arm_compute v19.05
diff --git a/tests/validation/CL/ArithmeticAddition.cpp b/tests/validation/CL/ArithmeticAddition.cpp
index 04c9c85..bed04af 100644
--- a/tests/validation/CL/ArithmeticAddition.cpp
+++ b/tests/validation/CL/ArithmeticAddition.cpp
@@ -130,7 +130,7 @@
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
                shape, policy)
 {
     // Create tensors
@@ -155,7 +155,7 @@
 
 FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
                        ArithmeticAdditionQASYMM8Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) }))
diff --git a/tests/validation/CL/ArithmeticSubtraction.cpp b/tests/validation/CL/ArithmeticSubtraction.cpp
index a8b54fd..796486b 100644
--- a/tests/validation/CL/ArithmeticSubtraction.cpp
+++ b/tests/validation/CL/ArithmeticSubtraction.cpp
@@ -131,7 +131,7 @@
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
                shape, policy)
 {
     // Create tensors
@@ -156,7 +156,7 @@
 
 FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
                        ArithmeticSubtractionQASYMM8Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) }))
diff --git a/tests/validation/CL/BatchToSpaceLayer.cpp b/tests/validation/CL/BatchToSpaceLayer.cpp
index e36663e..30b7817 100644
--- a/tests/validation/CL/BatchToSpaceLayer.cpp
+++ b/tests/validation/CL/BatchToSpaceLayer.cpp
@@ -52,21 +52,28 @@
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32),    // blockx != blocky && blockx > blocky
+                                                       TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32),    // blockx != blocky && blocky > blockx
                                                        TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),     // Mismatching data types
                                                        TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),     // Wrong data type block shape
                                                        TensorInfo(TensorShape(32U, 13U, 2U, 2U, 4U), 1, DataType::F32), // Wrong tensor shape
                                                      }),
                framework::dataset::make("BlockShapeInfo",{ TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(2U, 4U), 1, DataType::S32),
+                                                      TensorInfo(TensorShape(4U, 2U), 1, DataType::S32),
                                                        TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
                                                        TensorInfo(TensorShape(2U, 2U), 1, DataType::F16),
                                                        TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
                                                      })),
                framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(64U, 16U, 2U, 1U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 32U, 2U, 1U), 1, DataType::F32),
+
                                                        TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F16),
                                                        TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
                                                      })),
-               framework::dataset::make("Expected", { true, false, false, false})),
+               framework::dataset::make("Expected", { true, true,true, false, false, false})),
                input_info, block_shape_info, output_info, expected)
 {
     bool has_error = bool(CLBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), &block_shape_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false)));
@@ -74,18 +81,22 @@
 }
 DATA_TEST_CASE(ValidateStatic, framework::DatasetMode::ALL, zip(zip(zip(zip(
                framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32),    // blockx != blocky && blockx > blocky
+                                                       TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32),    // blockx != blocky && blocky > blockx
                                                        TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32),    // Mismatching data types
                                                        TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32),    // Negative block shapes
                                                        TensorInfo(TensorShape(32U, 16U, 2U, 4U, 4U), 1, DataType::F32), // Wrong tensor shape
                                                      }),
-               framework::dataset::make("BlockShapeX", { 2, 2, 2, 2 })),
-               framework::dataset::make("BlockShapeY", { 2, 2, -2, 2 })),
+               framework::dataset::make("BlockShapeX", { 2, 4, 2, 2, 2, 2 })),
+               framework::dataset::make("BlockShapeY", { 2, 2, 4, 2, -2, 2 })),
                framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(64U, 16U, 2U, 1U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 32U, 2U, 1U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F16),
                                                        TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(32U, 8U, 2U, 1U), 1, DataType::F32),
                                                      })),
-               framework::dataset::make("Expected", { true, false, false, false})),
+               framework::dataset::make("Expected", { true, true,true, false, false, false})),
                input_info, block_shape_x, block_shape_y, output_info, expected)
 {
     bool has_error = bool(CLBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), block_shape_x, block_shape_y, &output_info.clone()->set_is_resizable(false)));
diff --git a/tests/validation/CL/Convolution.cpp b/tests/validation/CL/Convolution.cpp
index 0c00ee3..9c33d45 100644
--- a/tests/validation/CL/Convolution.cpp
+++ b/tests/validation/CL/Convolution.cpp
@@ -54,14 +54,14 @@
     CLTensor dst = create_tensor<CLTensor>(shape, output_data_type);
 
     // Create conv matrix
-    int16_t conv[9];
+    std::array<int16_t, 9> conv = { 0 };
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
     // Create and configure function
     CLConvolution3x3 convolution;
-    convolution.configure(&src, &dst, conv, 0, border_mode);
+    convolution.configure(&src, &dst, conv.data(), 0, border_mode);
 
     // Validate valid region
     const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_size / 2));
@@ -122,14 +122,14 @@
     CLTensor dst = create_tensor<CLTensor>(shape, output_data_type);
 
     // Create conv matrix
-    int16_t conv[25];
+    std::array<int16_t, 25> conv = { 0 };
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
     // Create and configure function
     CLConvolution5x5 convolution;
-    convolution.configure(&src, &dst, conv, 0, border_mode);
+    convolution.configure(&src, &dst, conv.data(), 0, border_mode);
 
     // Validate valid region
     const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_size / 2));
@@ -190,14 +190,14 @@
     CLTensor dst = create_tensor<CLTensor>(shape, output_data_type);
 
     // Create conv matrix
-    int16_t conv[49];
+    std::array<int16_t, 49> conv = { 0 };
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
     // Create and configure function
     CLConvolution7x7 convolution;
-    convolution.configure(&src, &dst, conv, 0, border_mode);
+    convolution.configure(&src, &dst, conv.data(), 0, border_mode);
 
     // Validate valid region
     const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_size / 2));
@@ -258,14 +258,14 @@
     CLTensor dst = create_tensor<CLTensor>(shape, output_data_type);
 
     // Create conv matrix
-    int16_t conv[81];
+    std::array<int16_t, 81> conv = { 0 };
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
     // Create and configure function
     CLConvolution9x9 convolution;
-    convolution.configure(&src, &dst, conv, 0, border_mode);
+    convolution.configure(&src, &dst, conv.data(), 0, border_mode);
 
     // Validate valid region
     const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_size / 2));
@@ -328,14 +328,14 @@
     CLTensor dst = create_tensor<CLTensor>(shape, output_data_type);
 
     // Create conv matrix
-    int16_t conv[filter_width * filter_height];
+    std::vector<int16_t> conv(filter_width * filter_height);
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
     // Create and configure function
     CLConvolutionRectangle convolution;
-    convolution.configure(&src, &dst, conv, filter_width, filter_height, 1, border_mode);
+    convolution.configure(&src, &dst, conv.data(), filter_width, filter_height, 1, border_mode);
 
     // Validate valid region
     const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_height / 2, filter_width / 2));
diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp
index 41d2b7b..f1f9b59 100644
--- a/tests/validation/CL/ConvolutionLayer.cpp
+++ b/tests/validation/CL/ConvolutionLayer.cpp
@@ -46,7 +46,7 @@
 namespace
 {
 constexpr AbsoluteTolerance<float>  absolute_tolerance_float(0.0001f);    /**< Absolute Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-RelativeTolerance<float>            tolerance_f32(0.05f);                 /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+RelativeTolerance<float>            tolerance_f32(0.1f);                  /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
 RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
 constexpr AbsoluteTolerance<float>  tolerance_qasymm8(1);                 /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
 constexpr float                     tolerance_num = 0.07f;                /**< Tolerance number */
diff --git a/tests/validation/CL/CropResize.cpp b/tests/validation/CL/CropResize.cpp
new file mode 100644
index 0000000..cacf405
--- /dev/null
+++ b/tests/validation/CL/CropResize.cpp
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLCropResize.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/CropResizeDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/CropResizeFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(CropResize)
+
+RelativeTolerance<float> tolerance_fp32(0.001f);
+
+template <typename T>
+using CLCropResizeFixture = CropResizeFixture<CLTensor, CLAccessor, CLCropResize, T>;
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::U8),  // Invalid input data type.
+                                                       TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32), // Invalid box_ind shape.
+                                                       TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32), // Invalid output shape.
+                                                       TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32), // Invalid output data type.
+                                                       TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32), // Invalid output shape.
+                                                       TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32), // Invalid boxes shape.
+                                                     }),
+               framework::dataset::make("BoxesInfo",{  TensorInfo(TensorShape(4, 20), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(4, 20), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(4, 20), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(4, 20), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(4, 20), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(4, 20), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(3, 20), 1, DataType::F32),
+                                                     })),
+               framework::dataset::make("BoxIndInfo",{ TensorInfo(TensorShape(20), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(20), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(10), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(20), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(20), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(20), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(20), 1, DataType::S32),
+                                                     })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(15U, 5, 5, 10U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(5U, 5, 5, 20U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::F32),
+                                                     })),
+               framework::dataset::make("Expected", { true, false, false, false, false, false, false})),
+               input, boxes, box_ind, output, expected)
+{
+    ARM_COMPUTE_EXPECT(bool(CLCropResize::validate(&input.clone()->set_data_layout(DataLayout::NHWC).set_is_resizable(false),
+                                                   &boxes.clone()->set_is_resizable(false),
+                                                   &box_ind.clone()->set_is_resizable(false),
+                                                   &output.clone()->set_data_layout(DataLayout::NHWC).set_is_resizable(false),
+                                                   Coordinates2D{ 5, 5 }, InterpolationPolicy::BILINEAR, 100)) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+TEST_SUITE(Float)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+TEST_SUITE(F16)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       CLCropResizeFixture<half>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallCropResizeDataset(),
+                               combine(framework::dataset::make("IsOutOfBounds", { true, false }),
+                                       framework::dataset::make("DataType", DataType::F16))))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01);
+}
+TEST_SUITE_END() // F16
+#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
+TEST_SUITE(F32)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       CLCropResizeFixture<float>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallCropResizeDataset(),
+                               combine(framework::dataset::make("IsOutOfBounds", { true, false }),
+                                       framework::dataset::make("DataType", DataType::F32))))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01);
+}
+TEST_SUITE_END() // F32
+TEST_SUITE_END() // Float
+
+TEST_SUITE(U16)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       CLCropResizeFixture<uint16_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallCropResizeDataset(),
+                               combine(framework::dataset::make("IsOutOfBounds", { true, false }),
+                                       framework::dataset::make("DataType", DataType::U16))))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01);
+}
+TEST_SUITE_END() // U16
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       CLCropResizeFixture<int16_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallCropResizeDataset(),
+                               combine(framework::dataset::make("IsOutOfBounds", { true, false }),
+                                       framework::dataset::make("DataType", DataType::S16))))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01);
+}
+TEST_SUITE_END() // S16
+
+TEST_SUITE(U32)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       CLCropResizeFixture<uint32_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallCropResizeDataset(),
+                               combine(framework::dataset::make("IsOutOfBounds", { true, false }),
+                                       framework::dataset::make("DataType", DataType::U32))))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01);
+}
+TEST_SUITE_END() // U32
+
+TEST_SUITE(S32)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       CLCropResizeFixture<int32_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallCropResizeDataset(),
+                               combine(framework::dataset::make("IsOutOfBounds", { true, false }),
+                                       framework::dataset::make("DataType", DataType::S32))))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01);
+}
+TEST_SUITE_END() // S32
+
+TEST_SUITE_END() // CropResize
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp
index 31852c8..db93beb 100644
--- a/tests/validation/CL/DeconvolutionLayer.cpp
+++ b/tests/validation/CL/DeconvolutionLayer.cpp
@@ -58,10 +58,13 @@
 const auto data3x3_precommit = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadX", 0, 2)
                                * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 });
 
+const auto data2x2_precommit = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 2) * framework::dataset::make("StrideY", 2) * framework::dataset::make("PadX", 1)
+                               * framework::dataset::make("PadY", 1) * framework::dataset::make("NumKernels", { 3 });
+
 const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1)
                      * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels", { 3 });
 
-const auto data_layouts_dataset = framework::dataset::make("DataLayout", { DataLayout::NCHW });
+const auto data_layouts_dataset = framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC });
 } // namespace
 
 TEST_SUITE(CL)
@@ -119,7 +122,7 @@
                                                 1U,
                                                 0U,
                                             })),
-    framework::dataset::make("Expected", { false, false, false, false, false, true })),
+    framework::dataset::make("Expected", { false, false, false, false, true, true })),
     input_info, weights_info, bias_info, output_info, pad_info, ax, ay, expected)
 {
     bool is_valid = bool(CLDeconvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pad_info, ax, ay));
@@ -135,6 +138,9 @@
 using CLDeconvolutionLayerFixture3x3 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 3, 3>;
 
 template <typename T>
+using CLDeconvolutionLayerFixture2x2 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 2, 2>;
+
+template <typename T>
 using CLDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 1, 1>;
 
 TEST_SUITE(Float)
@@ -164,6 +170,15 @@
 }
 TEST_SUITE_END() // W3x3
 
+TEST_SUITE(W2x2)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerFixture2x2<float>, framework::DatasetMode::PRECOMMIT, combine(combine(data2x2_precommit, framework::dataset::make("DataType", DataType::F32)),
+                                                                                                                   data_layouts_dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END() // W2x2
+
 TEST_SUITE(W1x1)
 FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1<float>, framework::DatasetMode::NIGHTLY, combine(combine(data1x1, framework::dataset::make("DataType", DataType::F32)),
                                                                                                             data_layouts_dataset))
@@ -200,6 +215,15 @@
 }
 TEST_SUITE_END() // W3x3
 
+TEST_SUITE(W2x2)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerFixture2x2<half>, framework::DatasetMode::PRECOMMIT, combine(combine(data2x2_precommit, framework::dataset::make("DataType", DataType::F16)),
+                                                                                                                  data_layouts_dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END() // W2x2
+
 TEST_SUITE(W1x1)
 FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1<half>, framework::DatasetMode::NIGHTLY, combine(combine(data1x1, framework::dataset::make("DataType", DataType::F16)), data_layouts_dataset))
 {
@@ -218,6 +242,9 @@
 using CLDeconvolutionLayerQuantizedFixture3x3 = DeconvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 3, 3>;
 
 template <typename T>
+using CLDeconvolutionLayerQuantizedFixture2x2 = DeconvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 2, 2>;
+
+template <typename T>
 using CLDeconvolutionLayerQuantizedFixture1x1 = DeconvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 1, 1>;
 
 TEST_SUITE(Quantized)
@@ -253,6 +280,17 @@
 }
 TEST_SUITE_END() // W3x3
 
+TEST_SUITE(W2x2)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedFixture2x2<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data2x2_precommit, framework::dataset::make("DataType",
+                       DataType::QASYMM8)),
+                       data_layouts_dataset),
+                       framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0))))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END() // W2x2
+
 TEST_SUITE(W1x1)
 FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture1x1<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data1x1, framework::dataset::make("DataType",
                                                                                                                        DataType::QASYMM8)),
diff --git a/tests/validation/CL/DepthConcatenateLayer.cpp b/tests/validation/CL/DepthConcatenateLayer.cpp
index 01477f9..8cbfda3 100644
--- a/tests/validation/CL/DepthConcatenateLayer.cpp
+++ b/tests/validation/CL/DepthConcatenateLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,14 +24,14 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h"
+#include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/DepthConcatenateLayerFixture.h"
+#include "tests/validation/fixtures/ConcatenateLayerFixture.h"
 
 namespace arm_compute
 {
@@ -68,13 +68,13 @@
     inputs_vector_info.emplace_back(std::move(input_info2));
 
     std::vector<ITensorInfo *> inputs_vector_info_raw;
+    inputs_vector_info_raw.reserve(inputs_vector_info.size());
     for(auto &input : inputs_vector_info)
     {
         inputs_vector_info_raw.emplace_back(&input);
     }
 
-    bool is_valid = bool(CLDepthConcatenateLayer::validate(inputs_vector_info_raw,
-                                                           &output_info.clone()->set_is_resizable(false)));
+    bool is_valid = bool(CLConcatenateLayer::validate(inputs_vector_info_raw, &output_info.clone()->set_is_resizable(false), 2));
     ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
@@ -83,37 +83,38 @@
 TEST_CASE(Configuration, framework::DatasetMode::ALL)
 {
     // Create tensors
-    CLTensor src1 = create_tensor<CLTensor>(TensorShape(32U, 32U, 128U), DataType::F32, 1);
-    CLTensor src2 = create_tensor<CLTensor>(TensorShape(32U, 32U, 32U), DataType::F32, 1);
+    CLTensor src1 = create_tensor<CLTensor>(TensorShape(128U, 32U, 32U), DataType::F32, 1);
+    CLTensor src2 = create_tensor<CLTensor>(TensorShape(128U, 32U, 32U), DataType::F32, 1);
+    CLTensor src3 = create_tensor<CLTensor>(TensorShape(128U, 32U, 32U), DataType::F32, 1);
     CLTensor dst;
 
     ARM_COMPUTE_EXPECT(src1.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(src2.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(src3.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
     // Create and configure function
-    CLDepthConcatenateLayer concat_layer;
+    CLConcatenateLayer concat_layer;
 
-    concat_layer.configure({ &src1, &src2 }, &dst);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(TensorShape(32U, 32U, 160U));
-    validate(dst.info()->valid_region(), valid_region);
+    concat_layer.configure({ &src1, &src2, &src3 }, &dst, 2);
 }
-
 template <typename T>
-using CLDepthConcatenateLayerFixture = DepthConcatenateLayerValidationFixture<CLTensor, ICLTensor, CLAccessor, CLDepthConcatenateLayer, T>;
+using CLDepthConcatenateLayerFixture = ConcatenateLayerValidationFixture<CLTensor, ICLTensor, CLAccessor, CLConcatenateLayer, T>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
-                                                                                                                  DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Tiny4DShapes()),
+                                                                                                                  framework::dataset::make("DataType",
+                                                                                                                          DataType::F16)),
+                                                                                                                  framework::dataset::make("Axis", 2)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
-                                                                                                                DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(concat(datasets::Large3DShapes(), datasets::Small4DShapes()),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::F16)),
+                                                                                                                framework::dataset::make("Axis", 2)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -121,14 +122,17 @@
 TEST_SUITE_END()
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
-                                                                                                                   DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Tiny4DShapes()),
+                                                                                                                   framework::dataset::make("DataType",
+                                                                                                                           DataType::F32)),
+                                                                                                                   framework::dataset::make("Axis", 2)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType",
-                                                                                                                 DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::F32)),
+                                                                                                                 framework::dataset::make("Axis", 2)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -136,17 +140,19 @@
 TEST_SUITE_END()
 TEST_SUITE_END()
 
-
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
-                                                                                                                     DataType::QASYMM8)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Tiny4DShapes()),
+                                                                                                                     framework::dataset::make("DataType",
+                                                                                                                             DataType::QASYMM8)),
+                                                                                                                     framework::dataset::make("Axis", 2)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType",
-                                                                                                                   DataType::QASYMM8)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::QASYMM8)),
+                                                                                                                   framework::dataset::make("Axis", 2)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -154,7 +160,6 @@
 TEST_SUITE_END()
 TEST_SUITE_END()
 
-
 TEST_SUITE_END()
 TEST_SUITE_END()
 } // namespace validation
diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
index dd2d9f3..274a0f5 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -18,7 +18,7 @@
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
@@ -28,6 +28,7 @@
 #include "tests/CL/CLAccessor.h"
 #include "tests/PaddingCalculator.h"
 #include "tests/datasets/DepthwiseConvolutionLayerDataset.h"
+#include "tests/datasets/DilatedDepthwiseConvolutionLayerDataset.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
@@ -48,6 +49,13 @@
 constexpr float                      tolerance_num = 0.05f;                 /**< Tolerance number */
 
 const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 3 });
+
+//Activation Functions
+const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+{
+    ActivationLayerInfo(),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
+});
 } // namespace
 
 TEST_SUITE(CL)
@@ -55,7 +63,7 @@
 
 // *INDENT-OFF*
 // clang-format off
-DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
+DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(zip(
                framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),     // Mismatching data type input/weights
                                                        TensorInfo(TensorShape(32U, 18U, 3U), 1, DataType::F32),     // Mismatching input feature maps
                                                        TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),     // Unsupported weights dimensions
@@ -66,6 +74,8 @@
                                                        TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),     // Invalid biases dimensions
                                                        TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),     // Invalid output size
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // Window shrink
+                                                       TensorInfo(TensorShape(32U, 18U, 8U), 1, DataType::F32),     // patch size bigger than input width
+                                                       TensorInfo(TensorShape(32U, 18U, 8U), 1, DataType::F32),     // dilation < 1
                                                        TensorInfo(TensorShape(32U, 18U, 8U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(50U, 32U, 8U), 1, DataType::QASYMM8),
                                                      }),
@@ -80,6 +90,8 @@
                                                          TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
                                                          TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
                                                          TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
+                                                         TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
+                                                         TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
                                                          TensorInfo(TensorShape(3U, 3U, 24U), 1, DataType::QASYMM8),
                                                        })),
                framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32),
@@ -93,6 +105,8 @@
                                                         TensorInfo(TensorShape(2U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(2U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(16U), 1, DataType::F32),
+                                                        TensorInfo(TensorShape(16U), 1, DataType::F32),
+                                                        TensorInfo(TensorShape(16U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(24U), 1, DataType::S32),
                                                       })),
                framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
@@ -106,6 +120,8 @@
                                                         TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(30U, 16U, 16U), 1, DataType::F32),
+                                                        TensorInfo(TensorShape(30U, 16U, 16U), 1, DataType::F32),
+                                                        TensorInfo(TensorShape(30U, 16U, 16U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(48U, 30U, 24U), 1, DataType::QASYMM8),
                                                       })),
                framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
@@ -120,6 +136,8 @@
                                                       PadStrideInfo(1, 1, 0, 0),
                                                       PadStrideInfo(1, 1, 0, 0),
                                                       PadStrideInfo(1, 1, 0, 0),
+                                                      PadStrideInfo(1, 1, 0, 0),
+                                                      PadStrideInfo(1, 1, 0, 0),
                                                      })),
                framework::dataset::make("DepthMultiplier", { 1,
                                                              1,
@@ -132,6 +150,8 @@
                                                              1,
                                                              1,
                                                              2,
+                                                             2,
+                                                             2,
                                                              3,
                                                             })),
                 framework::dataset::make("ActivationInfo", { ActivationLayerInfo(),
@@ -145,22 +165,41 @@
                                                              ActivationLayerInfo(),
                                                              ActivationLayerInfo(),
                                                              ActivationLayerInfo(),
+                                                             ActivationLayerInfo(),
+                                                             ActivationLayerInfo(),
                                                              ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
                                                            })),
-               framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false, false, true, true })),
-               input_info, weights_info, biases_info, output_info, conv_info, depth_multiplier, act_info, expected)
+                framework::dataset::make("Dilation", { Size2D(1U, 1U),
+                                                       Size2D(1U, 1U),
+                                                       Size2D(1U, 1U),
+                                                       Size2D(1U, 1U),
+                                                       Size2D(1U, 1U),
+                                                       Size2D(1U, 1U),
+                                                       Size2D(1U, 1U),
+                                                       Size2D(1U, 1U),
+                                                       Size2D(1U, 1U),
+                                                       Size2D(1U, 1U),
+                                                       Size2D(20U, 1U),
+                                                       Size2D(0U, 1U),
+                                                       Size2D(1U, 1U),
+                                                       Size2D(1U, 1U),
+                                                       })),
+               framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false, false, false, false, true, true })),
+               input_info, weights_info, biases_info, output_info, conv_info, depth_multiplier, act_info, dilation, expected)
 {
-    bool is_valid = bool(CLDepthwiseConvolutionLayer3x3::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, depth_multiplier, act_info));
+    bool is_valid = bool(CLDepthwiseConvolutionLayer3x3::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, depth_multiplier, act_info,GPUTarget::MIDGARD, dilation));
     ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
 }
 
-DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
+DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
                 framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Mismatching data type input/weights
                                                         TensorInfo(TensorShape(27U, 13U, 3U), 1, DataType::F32),    // Mismatching input feature maps
                                                         TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Mismatching depth multiplier
                                                         TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Invalid biases size
                                                         TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Invalid biases dimensions
                                                         TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Invalid output size
+                                                        TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32),    // patch size bigger than input width
+                                                        TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32),    // dilation < 1
                                                         TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(32U, 13U, 8U), 1, DataType::QASYMM8),
                                                       }),
@@ -171,6 +210,8 @@
                                                           TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
                                                           TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
                                                           TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
+                                                          TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
+                                                          TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
                                                           TensorInfo(TensorShape(3U, 3U, 24U), 1, DataType::QASYMM8),
                                                         })),
                 framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32),
@@ -180,6 +221,8 @@
                                                          TensorInfo(TensorShape(2U, 2U), 1, DataType::F32),
                                                          TensorInfo(TensorShape(2U), 1, DataType::F32),
                                                          TensorInfo(TensorShape(16U), 1, DataType::F32),
+                                                         TensorInfo(TensorShape(16U), 1, DataType::F32),
+                                                         TensorInfo(TensorShape(16U), 1, DataType::F32),
                                                          TensorInfo(TensorShape(24U), 1, DataType::S32),
                                                        })),
                 framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
@@ -189,6 +232,8 @@
                                                          TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
                                                          TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
                                                          TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
+                                                         TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
+                                                         TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
                                                          TensorInfo(TensorShape(32U, 11U, 24U), 1, DataType::QASYMM8),
                                                        })),
                 framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
@@ -198,6 +243,8 @@
                                                        PadStrideInfo(1, 1, 0, 0),
                                                        PadStrideInfo(1, 1, 0, 0),
                                                        PadStrideInfo(1, 1, 0, 0),
+                                                       PadStrideInfo(1, 1, 0, 0),
+                                                       PadStrideInfo(1, 1, 0, 0),
                                                        PadStrideInfo(1, 1, 1, 0),
                                                       })),
                 framework::dataset::make("DepthMultiplier", { 1,
@@ -207,12 +254,25 @@
                                                               1,
                                                               1,
                                                               2,
+                                                              2,
+                                                              2,
                                                               3,
                                                              })),
-                framework::dataset::make("Expected", { false, false, false, false, false, false, true, true })),
-                input_info, weights_info, biases_info, output_info, conv_info, depth_multiplier, expected)
+                framework::dataset::make("Dilation", { Size2D(1U, 1U),
+                                                              Size2D(1U, 1U),
+                                                              Size2D(1U, 1U),
+                                                              Size2D(1U, 1U),
+                                                              Size2D(1U, 1U),
+                                                              Size2D(1U, 1U),
+                                                              Size2D(20U, 1U),
+                                                              Size2D(0U, 1U),
+                                                              Size2D(1U, 1U),
+                                                              Size2D(1U, 1U),
+                                                             })),
+                framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, true, true })),
+                input_info, weights_info, biases_info, output_info, conv_info, depth_multiplier, dilation, expected)
 {
-    bool is_valid = bool(CLDepthwiseConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, depth_multiplier));
+    bool is_valid = bool(CLDepthwiseConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, depth_multiplier,ActivationLayerInfo(), dilation));
     ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
@@ -226,127 +286,269 @@
 TEST_SUITE(W3x3)
 TEST_SUITE(NCHW)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL,
-                       combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
-                                                                          datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()),
-                                               depth_multipliers),
-                                       framework::dataset::make("DataType",
-                                                                DataType::F16)),
-                               framework::dataset::make("DataLayout", DataLayout::NCHW)))
+                       combine(combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                                                  datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()),
+                                                       depth_multipliers),
+                                               framework::dataset::make("DataType",
+                                                                        DataType::F16)),
+                                       framework::dataset::make("DataLayout", DataLayout::NCHW)),
+                               ActivationFunctionsDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
                                                                                                                     depth_multipliers),
                                                                                                                     framework::dataset::make("DataType",
                                                                                                                             DataType::F16)),
-                                                                                                                    framework::dataset::make("DataLayout", DataLayout::NCHW)))
+                                                                                                                    framework::dataset::make("DataLayout", DataLayout::NCHW)),
+                                                                                                                    ActivationFunctionsDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
-TEST_SUITE_END()
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                                                                                        depth_multipliers),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::F16)),
+                                                                                                                        framework::dataset::make("DataLayout", { DataLayout::NCHW })),
+                                                                                                                ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                                                                                    depth_multipliers),
+                                                                                                                    framework::dataset::make("DataType",
+                                                                                                                            DataType::F16)),
+                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW })),
+                                                                                                                    ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // NCHW
 
 TEST_SUITE(NHWC)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL,
-                       combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
-                                               depth_multipliers),
-                                       framework::dataset::make("DataType",
-                                                                DataType::F16)),
-                               framework::dataset::make("DataLayout", DataLayout::NHWC)))
+                       combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                       depth_multipliers),
+                                               framework::dataset::make("DataType",
+                                                                        DataType::F16)),
+                                       framework::dataset::make("DataLayout", DataLayout::NHWC)),
+                               ActivationFunctionsDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
                                                                                                                     depth_multipliers),
                                                                                                                     framework::dataset::make("DataType",
                                                                                                                             DataType::F16)),
-                                                                                                                    framework::dataset::make("DataLayout", DataLayout::NHWC)))
+                                                                                                                    framework::dataset::make("DataLayout", DataLayout::NHWC)),
+                                                                                                                    ActivationFunctionsDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE(Generic)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers),
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                                                                                        depth_multipliers),
                                                                                                                         framework::dataset::make("DataType",
                                                                                                                                 DataType::F16)),
-                                                                                                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                                        framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                                                                                                ActivationFunctionsDataset))
 {
-    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+    validate(CLAccessor(_target), _reference, tolerance_f16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
                                                                                                                     depth_multipliers),
                                                                                                                     framework::dataset::make("DataType",
                                                                                                                             DataType::F16)),
-                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                                                                                                    ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // NHWC
+TEST_SUITE_END() // W3x3
+
+TEST_SUITE(Generic)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                                                                                                        depth_multipliers),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::F16)),
+                                                                                                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                ActivationFunctionsDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
 }
-TEST_SUITE_END()
-TEST_SUITE_END()
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+                                                                                                                    depth_multipliers),
+                                                                                                                    framework::dataset::make("DataType",
+                                                                                                                            DataType::F16)),
+                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                    ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+                                                                                                                        depth_multipliers),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::F16)),
+                                                                                                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
+                                                                                                                    depth_multipliers),
+                                                                                                                    framework::dataset::make("DataType",
+                                                                                                                            DataType::F16)),
+                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                    ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // Generic
+TEST_SUITE_END() // FP16
 
 TEST_SUITE(FP32)
 TEST_SUITE(W3x3)
 TEST_SUITE(NCHW)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL,
-                       combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
-                                                                          datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()),
-                                               depth_multipliers),
-                                       framework::dataset::make("DataType",
-                                                                DataType::F32)),
-                               framework::dataset::make("DataLayout", DataLayout::NCHW)))
+                       combine(combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                                                  datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()),
+                                                       depth_multipliers),
+                                               framework::dataset::make("DataType",
+                                                                        DataType::F32)),
+                                       framework::dataset::make("DataLayout", DataLayout::NCHW)),
+                               ActivationFunctionsDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
                                                                                                                      depth_multipliers),
                                                                                                                      framework::dataset::make("DataType",
                                                                                                                              DataType::F32)),
-                                                                                                                     framework::dataset::make("DataLayout", DataLayout::NCHW)))
+                                                                                                                     framework::dataset::make("DataLayout", DataLayout::NCHW)),
+                                                                                                                     ActivationFunctionsDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-TEST_SUITE_END()
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL,
+                       combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers),
+                                               framework::dataset::make("DataType",
+                                                                        DataType::F32)),
+                                       framework::dataset::make("DataLayout", DataLayout::NCHW)),
+                               ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                       depth_multipliers),
+                                               framework::dataset::make("DataType",
+                                                                        DataType::F32)),
+                                       framework::dataset::make("DataLayout", DataLayout::NCHW)),
+                               ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // NCHW
 TEST_SUITE(NHWC)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL,
-                       combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
-                                               depth_multipliers),
-                                       framework::dataset::make("DataType",
-                                                                DataType::F32)),
-                               framework::dataset::make("DataLayout", DataLayout::NHWC)))
+                       combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                       depth_multipliers),
+                                               framework::dataset::make("DataType",
+                                                                        DataType::F32)),
+                                       framework::dataset::make("DataLayout", DataLayout::NHWC)),
+                               ActivationFunctionsDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
                                                                                                                      depth_multipliers),
                                                                                                                      framework::dataset::make("DataType",
                                                                                                                              DataType::F32)),
-                                                                                                                     framework::dataset::make("DataLayout", DataLayout::NHWC)))
+                                                                                                                     framework::dataset::make("DataLayout", DataLayout::NHWC)),
+                                                                                                                     ActivationFunctionsDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE(Dilation)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL,
+                       combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                       depth_multipliers),
+                                               framework::dataset::make("DataType",
+                                                                        DataType::F32)),
+                                       framework::dataset::make("DataLayout", DataLayout::NHWC)),
+                               ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                       depth_multipliers),
+                                               framework::dataset::make("DataType",
+                                                                        DataType::F32)),
+                                       framework::dataset::make("DataLayout", DataLayout::NHWC)),
+                               ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // NHWC
+TEST_SUITE_END() // W3x3
 
 TEST_SUITE(Generic)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                                                                                                 depth_multipliers),
                                                                                                                  framework::dataset::make("DataType",
                                                                                                                          DataType::F32)),
-                                                                                                                 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                                 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                 ActivationFunctionsDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
                                                                                                                      depth_multipliers),
                                                                                                                      framework::dataset::make("DataType",
                                                                                                                              DataType::F32)),
-                                                                                                                     framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                                     framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                     ActivationFunctionsDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-TEST_SUITE_END()
-TEST_SUITE_END()
-TEST_SUITE_END()
+
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+                                                                                                                 depth_multipliers),
+                                                                                                                 framework::dataset::make("DataType",
+                                                                                                                         DataType::F32)),
+                                                                                                                 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                 ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                       depth_multipliers),
+                                               framework::dataset::make("DataType",
+                                                                        DataType::F32)),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // Generic
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
 
 template <typename T>
 using CLDepthwiseConvolutionLayerQuantizedFixture = DepthwiseConvolutionLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T>;
@@ -355,49 +557,97 @@
 TEST_SUITE(QASYMM8)
 TEST_SUITE(Generic)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
-                                                       depth_multipliers),
-                                               framework::dataset::make("DataType", DataType::QASYMM8)),
-                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                                               depth_multipliers),
+                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
-                                                       depth_multipliers),
-                                               framework::dataset::make("DataType", DataType::QASYMM8)),
-                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+                                                               depth_multipliers),
+                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-TEST_SUITE_END()
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+                                                               depth_multipliers),
+                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
+                                                               depth_multipliers),
+                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // Generic
 TEST_SUITE(W3x3)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
-                                                       depth_multipliers),
-                                               framework::dataset::make("DataType", DataType::QASYMM8)),
-                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                               depth_multipliers),
+                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
-                                                       depth_multipliers),
-                                               framework::dataset::make("DataType", DataType::QASYMM8)),
-                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                                                               depth_multipliers),
+                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-TEST_SUITE_END()
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                               depth_multipliers),
+                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                               depth_multipliers),
+                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // Dilation
+TEST_SUITE_END() // W3x3
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // Quantized
 
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // DepthwiseConvolutionLayer
+TEST_SUITE_END() // CL
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/CL/DequantizationLayer.cpp b/tests/validation/CL/DequantizationLayer.cpp
index 5303566..b1b0d81 100644
--- a/tests/validation/CL/DequantizationLayer.cpp
+++ b/tests/validation/CL/DequantizationLayer.cpp
@@ -40,107 +40,94 @@
 {
 namespace validation
 {
-namespace
-{
-const auto DequantizationShapes = concat(datasets::Small3DShapes(),
-                                         datasets::Small4DShapes());
-} // namespace
-
 TEST_SUITE(CL)
 TEST_SUITE(DequantizationLayer)
 
 // *INDENT-OFF*
 // clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
-               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32), // Wrong input data type
-                                                       TensorInfo(TensorShape(16U, 5U, 16U), 1, DataType::U8),       // Invalid shape
-                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::U8),  // Wrong output data type
-                                                       TensorInfo(TensorShape(16U, 16U, 2U, 5U), 1, DataType::U8),   // Missmatching shapes
-                                                       TensorInfo(TensorShape(17U, 16U, 16U, 5U), 1, DataType::U8),  // Shrink window
-                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::U8),  // Valid
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),      // Wrong input data type
+                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8),  // Wrong output data type
+                                                       TensorInfo(TensorShape(16U, 16U, 2U, 5U), 1, DataType::QASYMM8),   // Missmatching shapes
+                                                       TensorInfo(TensorShape(17U, 16U, 16U, 5U), 1, DataType::QASYMM8),  // Valid
+                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8),  // Valid
                                                      }),
                framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(16U, 5U, 16U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(17U, 16U, 16U, 5U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),
                                                      })),
-               framework::dataset::make("MinMax",{ TensorInfo(TensorShape(2U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(2U), 1, DataType::U8),
-                                                       TensorInfo(TensorShape(2U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(2U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(2U), 1, DataType::U8),
-                                                       TensorInfo(TensorShape(2U), 1, DataType::U8),
-                                                     })),
-               framework::dataset::make("Expected", { false, false, false, false, false, true})),
-               input_info, output_info, min_max, expected)
+               framework::dataset::make("Expected", { false, false, false, true, true})),
+               input_info, output_info, expected)
 {
-    ARM_COMPUTE_EXPECT(bool(CLDequantizationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), &min_max.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bool(CLDequantizationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
 // *INDENT-ON*
 
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(DequantizationShapes, framework::dataset::make("DataType", DataType::U8)), shape, data_type)
+DATA_TEST_CASE(Configuration,
+               framework::DatasetMode::ALL,
+               combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::F16, DataType::F32 })),
+               shape, data_type)
 {
-    TensorShape shape_min_max = shape;
-    shape_min_max.set(Window::DimX, 2);
-
-    // Remove Y and Z dimensions and keep the batches
-    shape_min_max.remove_dimension(1);
-    shape_min_max.remove_dimension(1);
-
     // Create tensors
-    CLTensor src     = create_tensor<CLTensor>(shape, data_type);
-    CLTensor dst     = create_tensor<CLTensor>(shape, DataType::F32);
-    CLTensor min_max = create_tensor<CLTensor>(shape_min_max, DataType::F32);
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::QASYMM8, 1, QuantizationInfo(0.5f, -10));
+    CLTensor dst = create_tensor<CLTensor>(shape, data_type);
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(min_max.info()->is_resizable(), framework::LogLevel::ERRORS);
 
     // Create and configure function
     CLDequantizationLayer dequant_layer;
-    dequant_layer.configure(&src, &dst, &min_max);
+    dequant_layer.configure(&src, &dst);
 
     // Validate valid region
     const ValidRegion valid_region = shape_to_valid_region(shape);
     validate(src.info()->valid_region(), valid_region);
     validate(dst.info()->valid_region(), valid_region);
 
-    // Validate valid region of min_max tensor
-    const ValidRegion valid_region_min_max = shape_to_valid_region(shape_min_max);
-    validate(min_max.info()->valid_region(), valid_region_min_max);
-
     // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 4).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-
-    // Validate padding of min_max tensor
-    const PaddingSize padding_min_max = PaddingCalculator(shape_min_max.x(), 2).required_padding();
-    validate(min_max.info()->padding(), padding_min_max);
+    validate(src.info()->padding(), PaddingSize());
+    validate(dst.info()->padding(), PaddingSize());
 }
 
 template <typename T>
 using CLDequantizationLayerFixture = DequantizationValidationFixture<CLTensor, CLAccessor, CLDequantizationLayer, T>;
 
-TEST_SUITE(Integer)
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDequantizationLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
-                                                                                                                   framework::dataset::make("DataType", DataType::U8)))
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDequantizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
+                                                                                                                        framework::dataset::make("DataType", DataType::F16)),
+                                                                                                                framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDequantizationLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(concat(datasets::Large3DShapes(), datasets::Large4DShapes()),
-                                                                                                                 framework::dataset::make("DataType", DataType::U8)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDequantizationLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
+                                                                                                                      framework::dataset::make("DataType", DataType::F16)),
+                                                                                                              framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-TEST_SUITE_END() // U8
-TEST_SUITE_END() // Integer
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDequantizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
+                                                                                                                 framework::dataset::make("DataType", DataType::F32)),
+                                                                                                                 framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDequantizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
+                                                                                                                       framework::dataset::make("DataType", DataType::F32)),
+                                                                                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // FP32
 
 TEST_SUITE_END() // DequantizationLayer
 TEST_SUITE_END() // CL
diff --git a/tests/validation/CL/FFT.cpp b/tests/validation/CL/FFT.cpp
new file mode 100644
index 0000000..9fdd85b
--- /dev/null
+++ b/tests/validation/CL/FFT.cpp
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLFFT1D.h"
+#include "arm_compute/runtime/CL/functions/CLFFT2D.h"
+#include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/SmallConvolutionLayerDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/FFTFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
+const auto shapes_1d  = framework::dataset::make("TensorShape", { TensorShape(2U, 2U, 3U), TensorShape(3U, 2U, 3U),
+                                                                  TensorShape(4U, 2U, 3U), TensorShape(5U, 2U, 3U),
+                                                                  TensorShape(7U, 2U, 3U), TensorShape(8U, 2U, 3U),
+                                                                  TensorShape(9U, 2U, 3U), TensorShape(25U, 2U, 3U),
+                                                                  TensorShape(49U, 2U, 3U), TensorShape(64U, 2U, 3U),
+                                                                  TensorShape(16U, 2U, 3U), TensorShape(32U, 2U, 3U),
+                                                                  TensorShape(96U, 2U, 2U)
+                                                                });
+const auto shapes_2d = framework::dataset::make("TensorShape", { TensorShape(2U, 2U, 3U), TensorShape(3U, 6U, 3U),
+                                                                 TensorShape(4U, 5U, 3U), TensorShape(5U, 7U, 3U),
+                                                                 TensorShape(7U, 25U, 3U), TensorShape(8U, 2U, 3U),
+                                                                 TensorShape(9U, 16U, 3U), TensorShape(25U, 32U, 3U),
+                                                                 TensorShape(192U, 128U, 2U)
+                                                               });
+
+const auto ActivationFunctionsSmallDataset = framework::dataset::make("ActivationInfo",
+{
+    ActivationLayerInfo(),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f)
+});
+
+RelativeTolerance<float> tolerance_f32(0.1f);   /**< Relative tolerance value for FP32 */
+constexpr float          tolerance_num = 0.07f; /**< Tolerance number */
+
+} // namespace
+TEST_SUITE(CL)
+TEST_SUITE(FFT1D)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(shapes_1d, data_types),
+               shape, data_type)
+{
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, data_type, 2);
+    CLTensor dst = create_tensor<CLTensor>(shape, data_type, 2);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLFFT1D fft1d;
+    fft1d.configure(&src, &dst, FFT1DInfo());
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    validate(src.info()->padding(), PaddingSize());
+    validate(dst.info()->padding(), PaddingSize());
+}
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+        framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 2, DataType::F32), // Mismatching data types
+                                                TensorInfo(TensorShape(32U, 13U, 2U), 2, DataType::F32), // Mismatching shapes
+                                                TensorInfo(TensorShape(32U, 13U, 2U), 3, DataType::F32), // Invalid channels
+                                                TensorInfo(TensorShape(32U, 13U, 2U), 2, DataType::F32), // Unsupported axis
+                                                TensorInfo(TensorShape(11U, 13U, 2U), 2, DataType::F32), // Undecomposable FFT
+                                                TensorInfo(TensorShape(25U, 13U, 2U), 2, DataType::F32),
+        }),
+        framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 2, DataType::F16),
+                                                TensorInfo(TensorShape(16U, 13U, 2U), 2, DataType::F32),
+                                                TensorInfo(TensorShape(32U, 13U, 2U), 2, DataType::F32),
+                                                TensorInfo(TensorShape(32U, 13U, 2U), 2, DataType::F32),
+                                                TensorInfo(TensorShape(11U, 13U, 2U), 2, DataType::F32),
+                                                TensorInfo(TensorShape(25U, 13U, 2U), 2, DataType::F32),
+        })),
+        framework::dataset::make("Axis", { 0, 0, 0, 2, 0, 0 })),
+        framework::dataset::make("Expected", { false, false, false, false, false, true })),
+        input_info, output_info, axis, expected)
+{
+    FFT1DInfo desc;
+    desc.axis = axis;
+    const Status s = CLFFT1D::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), desc);
+    ARM_COMPUTE_EXPECT(bool(s) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using CLFFT1DFixture = FFTValidationFixture<CLTensor, CLAccessor, CLFFT1D, FFT1DInfo, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFFT1DFixture<float>, framework::DatasetMode::ALL, combine(shapes_1d, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num);
+}
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
+
+TEST_SUITE_END() // FFT1D
+
+TEST_SUITE(FFT2D)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(shapes_2d, data_types),
+               shape, data_type)
+{
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, data_type, 2);
+    CLTensor dst = create_tensor<CLTensor>(shape, data_type, 2);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLFFT2D fft2d;
+    fft2d.configure(&src, &dst, FFT2DInfo());
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    validate(src.info()->padding(), PaddingSize());
+    validate(dst.info()->padding(), PaddingSize());
+}
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
+        framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 25U, 2U), 2, DataType::F32), // Mismatching data types
+                                                TensorInfo(TensorShape(32U, 25U, 2U), 2, DataType::F32), // Mismatching shapes
+                                                TensorInfo(TensorShape(32U, 25U, 2U), 3, DataType::F32), // Invalid channels
+                                                TensorInfo(TensorShape(32U, 13U, 2U), 2, DataType::F32), // Undecomposable FFT
+                                                TensorInfo(TensorShape(32U, 25U, 2U), 2, DataType::F32),
+        }),
+        framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 25U, 2U), 2, DataType::F16),
+                                                TensorInfo(TensorShape(16U, 25U, 2U), 2, DataType::F32),
+                                                TensorInfo(TensorShape(32U, 25U, 2U), 1, DataType::F32),
+                                                TensorInfo(TensorShape(32U, 13U, 2U), 2, DataType::F32),
+                                                TensorInfo(TensorShape(32U, 25U, 2U), 2, DataType::F32),
+        })),
+        framework::dataset::make("Expected", { false, false, false, false, true })),
+               input_info, output_info, expected)
+{
+    const Status s = CLFFT2D::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), FFT2DInfo());
+    ARM_COMPUTE_EXPECT(bool(s) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using CLFFT2DFixture = FFTValidationFixture<CLTensor, CLAccessor, CLFFT2D, FFT2DInfo, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFFT2DFixture<float>, framework::DatasetMode::ALL, combine(shapes_2d, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num);
+}
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // FFT2D
+
+TEST_SUITE(FFTConvolutionLayer)
+
+template <typename T>
+using CLFFTConvolutionLayerFixture = FFTConvolutionValidationFixture<CLTensor, CLAccessor, CLFFTConvolutionLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFFTConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFFTConvolutionLayerDataset(),
+                                                                                                                 framework::dataset::make("DataType", DataType::F32)),
+                                                                                                                 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                 ActivationFunctionsSmallDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num);
+}
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // FFTConvolutionLayer
+
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/GEMMLowp.cpp b/tests/validation/CL/GEMMLowp.cpp
index 08641db..efefbd6 100644
--- a/tests/validation/CL/GEMMLowp.cpp
+++ b/tests/validation/CL/GEMMLowp.cpp
@@ -28,6 +28,7 @@
 #include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/PaddingCalculator.h"
+#include "tests/datasets/GEMMLowpFusedOffsetOutputDataset.h"
 #include "tests/datasets/LargeGEMMLowpDataset.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/datasets/SmallGEMMLowpDataset.h"
@@ -83,6 +84,21 @@
     validate(CLAccessor(_target), _reference);
 }
 
+using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore>;
+TEST_SUITE(FusedOffsetOutput)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpFusedOffsetOutputDataset())
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpFusedOffsetOutputDataset())
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // FusedOffsetOutput
+
 TEST_SUITE(Output3D)
 using CLGEMMLowpMatrixMultiplyCoreOutput3DFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, true>;
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreOutput3DFixture, framework::DatasetMode::PRECOMMIT, datasets::SmallGEMMLowpOutput3DDataset())
diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp
index 62b0d02..60b92bd 100644
--- a/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp
+++ b/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp
@@ -192,7 +192,7 @@
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-TEST_SUITE_END() // GEMMLowpMatrixMulipltyReshaped
+TEST_SUITE_END() // GEMMLowpMatrixMultiplyReshaped
 TEST_SUITE_END() // CL
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp
new file mode 100644
index 0000000..a907c5b
--- /dev/null
+++ b/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/CL/Helper.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/GEMMLowpFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using namespace arm_compute::misc::shape_calculator;
+
+// Create function for CLGEMMReshapeRHSMatrixKernel
+using CLGEMMReshapeRHSMatrix = CLSynthetizeFunction<CLGEMMReshapeRHSMatrixKernel>;
+
+// Create function for CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel
+using CLGEMMLowpMatrixMultiplyReshapedOnlyRHS = CLSynthetizeFunction<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel>;
+
+// Fixture for CLGEMMLowpMatrixMultiplyReshapedOnlyRHS
+using CLGEMMLowpMatrixMultiplyReshapedOnlyRHSFixture = GEMMLowpMatrixMultiplyReshapedOnlyRHSValidationFixture<CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, CLGEMMLowpMatrixMultiplyReshapedOnlyRHS>;
+
+// Fixture for CLGEMMLowpMatrixMultiplyReshapedOnlyRHS3D
+using CLGEMMLowpMatrixMultiplyReshapedOnlyRHS3DFixture =
+    GEMMLowpMatrixMultiplyReshapedOnlyRHS3DValidationFixture<CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, CLGEMMLowpMatrixMultiplyReshapedOnlyRHS>;
+
+namespace
+{
+// *INDENT-OFF*
+// clang-format off
+
+/** M values to test */
+const auto m_values = framework::dataset::make("M", 37);
+
+/** M_W values to test */
+const auto m_w_values = framework::dataset::make("M_W", 5);
+
+/** M_H values to test */
+const auto m_h_values = framework::dataset::make("M_H", 7);
+
+/** N values to test */
+const auto n_values = framework::dataset::make("N", 51);
+
+/** K values to test */
+const auto k_values = framework::dataset::make("K", 23);
+
+/** Batch size values to test */
+const auto b_values = framework::dataset::make("batch_size", 1, 3);
+
+/** M0 values to test - Precommit */
+const auto m0_values_precommit = framework::dataset::make("M0", {4, 6});
+
+/** N0 values to test - Precommit */
+const auto n0_values_precommit = framework::dataset::make("N0", { 2, 4 });
+
+/** K0 values to test - Precommit */
+const auto k0_values_precommit = framework::dataset::make("K0", { 4 });
+
+/** H0 values to test - Precommit */
+const auto h0_values_precommit = framework::dataset::make("H0", 1, 3);
+
+/** M0 values to test - Nightly */
+const auto m0_values_nightly = framework::dataset::make("M0", 2, 8);
+
+/** N0 values to test - Nightly */
+const auto n0_values_nightly = framework::dataset::make("N0", { 2, 3, 4, 8 });
+
+/** K0 values to test - Nightly */
+const auto k0_values_nightly = framework::dataset::make("K0", { 2, 3, 4, 8 });
+
+/** H0 values to test - Nightly */
+const auto h0_values_nightly = framework::dataset::make("H0", 1, 4);
+
+/** Interleave values to test with RHS matrix */
+const auto i_values_rhs = framework::dataset::make("interleave_rhs", { true, false });
+
+/** Transpose values to test with RHS matrix */
+const auto t_values_rhs = framework::dataset::make("transpose_rhs", { true });
+
+/** Configuration test */
+void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned int k_value, unsigned int b_value, unsigned int m0_value, unsigned int n0_value, unsigned int k0_value, unsigned int h0_value, bool i_value_rhs)
+{
+    const unsigned int M = m_value;
+    const unsigned int N = n_value;
+    const unsigned int K = k_value;
+
+    GEMMLHSMatrixInfo lhs_info;
+    lhs_info.m0         = m0_value;
+    lhs_info.k0         = k0_value;
+
+    GEMMRHSMatrixInfo rhs_info;
+    rhs_info.n0         = n0_value;
+    rhs_info.k0         = k0_value;
+    rhs_info.h0         = h0_value;
+    rhs_info.interleave = i_value_rhs;
+    rhs_info.transpose  = true;
+
+    GEMMReshapeInfo gemm_info(M, N, K);
+
+    const TensorShape lhs_shape(K, M, b_value);
+    const TensorShape rhs_shape(N, K, b_value);
+    const TensorShape rhs_shape_reshaped = compute_rhs_reshaped_shape(TensorInfo(rhs_shape, 1, DataType::QASYMM8),
+                                                                      rhs_info);
+
+    const TensorShape dst_shape = compute_mm_shape(TensorInfo(lhs_shape, 1, DataType::QASYMM8),
+                                                   TensorInfo(rhs_shape_reshaped, 1, DataType::QASYMM8),
+                                                   gemm_info);
+
+    // Create tensors
+    CLTensor lhs          = create_tensor<CLTensor>(lhs_shape, DataType::QASYMM8);
+    CLTensor rhs_reshaped = create_tensor<CLTensor>(rhs_shape_reshaped, DataType::QASYMM8);
+    CLTensor dst          = create_tensor<CLTensor>(dst_shape, DataType::S32);
+
+    ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLGEMMLowpMatrixMultiplyReshapedOnlyRHS gemm;
+    gemm.configure(&lhs, &rhs_reshaped, &dst, lhs_info, rhs_info, gemm_info);
+}
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(GEMMLowpMatrixMultiplyReshapedOnlyRHS)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   framework::dataset::make("batch_size", 1)),
+                                                                   m0_values_precommit),
+                                                                   n0_values_precommit),
+                                                                   k0_values_precommit),
+                                                                   h0_values_precommit),
+                                                                   i_values_rhs),
+m_value, n_value, k_value, b_value, m0_value, n0_value, k0_value, h0_value, i_value_rhs)
+{
+    validate_configuration(m_value, n_value, k_value, b_value, m0_value, n0_value, k0_value, h0_value, i_value_rhs);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyReshapedOnlyRHSFixture, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_precommit),
+                                                                   n0_values_precommit),
+                                                                   k0_values_precommit),
+                                                                   h0_values_precommit),
+                                                                   i_values_rhs),
+                                                                   t_values_rhs))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyReshapedOnlyRHSFixture, framework::DatasetMode::NIGHTLY,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_nightly),
+                                                                   n0_values_nightly),
+                                                                   k0_values_nightly),
+                                                                   h0_values_nightly),
+                                                                   i_values_rhs),
+                                                                   t_values_rhs))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMLowpMatrixMultiplyReshapedOnlyRHS3DFixture, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_w_values,
+                                                                   m_h_values),
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_precommit),
+                                                                   n0_values_precommit),
+                                                                   k0_values_precommit),
+                                                                   h0_values_precommit),
+                                                                   i_values_rhs),
+                                                                   t_values_rhs))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMLowpMatrixMultiplyReshapedOnlyRHS3DFixture, framework::DatasetMode::NIGHTLY,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_w_values,
+                                                                   m_h_values),
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_nightly),
+                                                                   n0_values_nightly),
+                                                                   k0_values_nightly),
+                                                                   h0_values_nightly),
+                                                                   i_values_rhs),
+                                                                   t_values_rhs))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // GEMMLowpMatrixMultiplyReshapedOnlyRHS
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
\ No newline at end of file
diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
new file mode 100644
index 0000000..83051d2
--- /dev/null
+++ b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
@@ -0,0 +1,350 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/CL/Helper.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/GEMMFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using namespace arm_compute::misc::shape_calculator;
+
+// Create function for CLGEMMReshapeRHSMatrixKernel
+using CLGEMMReshapeRHSMatrix = CLSynthetizeFunction<CLGEMMReshapeRHSMatrixKernel>;
+
+// Create function for CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
+using CLGEMMMatrixMultiplyReshapedOnlyRHS = CLSynthetizeFunction<CLGEMMMatrixMultiplyReshapedOnlyRHSKernel>;
+
+// Fixture for CLGEMMMatrixMultiplyReshapedOnlyRHS
+template <typename T>
+using CLGEMMMatrixMultiplyReshapedOnlyRHSFixture = GEMMMatrixMultiplyReshapedOnlyRHSValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshapedOnlyRHS>;
+
+// Fixture for CLGEMMMatrixMultiplyReshapedOnlyRHS3D
+template <typename T>
+using CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixture = GEMMMatrixMultiplyReshapedOnlyRHS3DValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshapedOnlyRHS>;
+
+namespace
+{
+// *INDENT-OFF*
+// clang-format off
+RelativeTolerance<float> rel_tolerance_f32(0.001f);
+constexpr float          abs_tolerance_f32(0.0001f);
+
+RelativeTolerance<half> rel_tolerance_f16(half(0.2));
+constexpr float         tolerance_num_f16 = 0.02f;
+
+/** Alpha values to test - Precommit */
+const auto a_values = framework::dataset::make("alpha", {1.0f, -0.75f} );
+
+/** M values to test */
+const auto m_values = framework::dataset::make("M", 37);
+
+/** M_W values to test */
+const auto m_w_values = framework::dataset::make("M_W", 5);
+
+/** M_H values to test */
+const auto m_h_values = framework::dataset::make("M_H", 7);
+
+/** N values to test */
+const auto n_values = framework::dataset::make("N", 51);
+
+/** K values to test */
+const auto k_values = framework::dataset::make("K", 23);
+
+/** Batch size values to test */
+const auto b_values = framework::dataset::make("batch_size", 1, 3);
+
+/** M0 values to test - Precommit */
+const auto m0_values_precommit = framework::dataset::make("M0", {4, 6});
+
+/** N0 values to test - Precommit */
+const auto n0_values_precommit = framework::dataset::make("N0", { 2, 4 });
+
+/** K0 values to test - Precommit */
+const auto k0_values_precommit = framework::dataset::make("K0", { 4 });
+
+/** H0 values to test - Precommit */
+const auto h0_values_precommit = framework::dataset::make("H0", 1, 3);
+
+/** M0 values to test - Nightly */
+const auto m0_values_nightly = framework::dataset::make("M0", 1, 8);
+
+/** N0 values to test - Nightly */
+const auto n0_values_nightly = framework::dataset::make("N0", { 2, 3, 4, 8 });
+
+/** K0 values to test - Nightly */
+const auto k0_values_nightly = framework::dataset::make("K0", { 2, 3, 4, 8 });
+
+/** H0 values to test - Nightly */
+const auto h0_values_nightly = framework::dataset::make("H0", 1, 4);
+
+/** Interleave values to test with RHS matrix */
+const auto i_values_rhs = framework::dataset::make("interleave_rhs", { true, false });
+
+/** Transpose values to test with RHS matrix */
+const auto t_values_rhs = framework::dataset::make("transpose_rhs", { true, false });
+
+/** Configuration test */
+void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned int k_value, unsigned int b_value, unsigned int m0_value, unsigned int n0_value, unsigned int k0_value, unsigned int h0_value, bool i_value_rhs, bool t_value_rhs, DataType data_type)
+{
+    const unsigned int M = m_value;
+    const unsigned int N = n_value;
+    const unsigned int K = k_value;
+
+    GEMMLHSMatrixInfo lhs_info;
+    lhs_info.m0         = m0_value;
+    lhs_info.k0         = k0_value;
+
+    GEMMRHSMatrixInfo rhs_info;
+    rhs_info.n0         = n0_value;
+    rhs_info.k0         = k0_value;
+    rhs_info.h0         = h0_value;
+    rhs_info.interleave = i_value_rhs;
+    rhs_info.transpose  = t_value_rhs;
+
+    GEMMReshapeInfo gemm_info(M, N, K);
+
+    const TensorShape lhs_shape(K, M, b_value);
+    const TensorShape rhs_shape(N, K, b_value);
+    const TensorShape rhs_shape_reshaped = compute_rhs_reshaped_shape(TensorInfo(rhs_shape, 1, data_type),
+                                                                      rhs_info);
+
+    const TensorShape dst_shape = compute_mm_shape(TensorInfo(lhs_shape, 1, data_type),
+                                                   TensorInfo(rhs_shape_reshaped, 1, data_type),
+                                                   gemm_info);
+
+    // Create tensors
+    CLTensor lhs          = create_tensor<CLTensor>(lhs_shape, data_type);
+    CLTensor rhs_reshaped = create_tensor<CLTensor>(rhs_shape_reshaped, data_type);
+    CLTensor dst          = create_tensor<CLTensor>(dst_shape, data_type);
+
+    ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLGEMMMatrixMultiplyReshapedOnlyRHS gemm;
+    gemm.configure(&lhs, &rhs_reshaped, &dst, 1.0f, lhs_info, rhs_info, gemm_info);
+}
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(GEMMMatrixMultiplyReshapedOnlyRHS)
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   framework::dataset::make("batch_size", 1)),
+                                                                   m0_values_precommit),
+                                                                   n0_values_precommit),
+                                                                   k0_values_precommit),
+                                                                   h0_values_precommit),
+                                                                   i_values_rhs),
+                                                                   t_values_rhs),
+m_value, n_value, k_value, b_value, m0_value, n0_value, k0_value, h0_value, i_value_rhs, t_value_rhs)
+{
+    validate_configuration(m_value, n_value, k_value, b_value, m0_value, n0_value, k0_value, h0_value, i_value_rhs, t_value_rhs, DataType::F32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture<float>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_precommit),
+                                                                   n0_values_precommit),
+                                                                   k0_values_precommit),
+                                                                   h0_values_precommit),
+                                                                   i_values_rhs),
+                                                                   t_values_rhs),
+                                                                   framework::dataset::make("DataType", DataType::F32)),
+                                                                   a_values))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture<float>, framework::DatasetMode::NIGHTLY,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_nightly),
+                                                                   n0_values_nightly),
+                                                                   k0_values_nightly),
+                                                                   h0_values_nightly),
+                                                                   i_values_rhs),
+                                                                   t_values_rhs),
+                                                                   framework::dataset::make("DataType", DataType::F32)),
+                                                                   a_values))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixture<float>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_w_values,
+                                                                   m_h_values),
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_precommit),
+                                                                   n0_values_precommit),
+                                                                   k0_values_precommit),
+                                                                   h0_values_precommit),
+                                                                   i_values_rhs),
+                                                                   t_values_rhs),
+                                                                   framework::dataset::make("DataType", DataType::F32)),
+                                                                   a_values))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixture<float>, framework::DatasetMode::NIGHTLY,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_w_values,
+                                                                   m_h_values),
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_nightly),
+                                                                   n0_values_nightly),
+                                                                   k0_values_nightly),
+                                                                   h0_values_nightly),
+                                                                   i_values_rhs),
+                                                                   t_values_rhs),
+                                                                   framework::dataset::make("DataType", DataType::F32)),
+                                                                   a_values))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture<half>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_precommit),
+                                                                   n0_values_precommit),
+                                                                   k0_values_precommit),
+                                                                   h0_values_precommit),
+                                                                   i_values_rhs),
+                                                                   t_values_rhs),
+                                                                   framework::dataset::make("DataType", DataType::F16)),
+                                                                   a_values))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedOnlyRHSFixture<half>, framework::DatasetMode::NIGHTLY,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_nightly),
+                                                                   n0_values_nightly),
+                                                                   k0_values_nightly),
+                                                                   h0_values_nightly),
+                                                                   i_values_rhs),
+                                                                   t_values_rhs),
+                                                                   framework::dataset::make("DataType", DataType::F16)),
+                                                                   a_values))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixture<half>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_w_values,
+                                                                   m_h_values),
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_precommit),
+                                                                   n0_values_precommit),
+                                                                   k0_values_precommit),
+                                                                   h0_values_precommit),
+                                                                   i_values_rhs),
+                                                                   t_values_rhs),
+                                                                   framework::dataset::make("DataType", DataType::F16)),
+                                                                   a_values))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixture<half>, framework::DatasetMode::NIGHTLY,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_w_values,
+                                                                   m_h_values),
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_nightly),
+                                                                   n0_values_nightly),
+                                                                   k0_values_nightly),
+                                                                   h0_values_nightly),
+                                                                   i_values_rhs),
+                                                                   t_values_rhs),
+                                                                   framework::dataset::make("DataType", DataType::F16)),
+                                                                   a_values))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16);
+}
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // GEMMMatrixMulipltyReshapedOnlyRHS
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
\ No newline at end of file
diff --git a/tests/validation/CL/HeightConcatenateLayer.cpp b/tests/validation/CL/HeightConcatenateLayer.cpp
new file mode 100644
index 0000000..78182ce
--- /dev/null
+++ b/tests/validation/CL/HeightConcatenateLayer.cpp
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+
+#include "tests/validation/fixtures/ConcatenateLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(HeightConcatenateLayer)
+
+template <typename T>
+using CLHeightConcatenateLayerFixture = ConcatenateLayerValidationFixture<CLTensor, ICLTensor, CLAccessor, CLConcatenateLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLHeightConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
+                                                                                                                   framework::dataset::make("DataType",
+                                                                                                                           DataType::F16)),
+                                                                                                                   framework::dataset::make("Axis", 1)))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLHeightConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(concat(datasets::Large2DShapes(), datasets::Small4DShapes()),
+                                                                                                                 framework::dataset::make("DataType",
+                                                                                                                         DataType::F16)),
+                                                                                                                 framework::dataset::make("Axis", 1)))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLHeightConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
+                                                                                                                    framework::dataset::make("DataType",
+                                                                                                                            DataType::F32)),
+                                                                                                                    framework::dataset::make("Axis", 1)))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLHeightConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType",
+                                                                                                                  DataType::F32)),
+                                                                                                                  framework::dataset::make("Axis", 1)))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLHeightConcatenateLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
+                                                                                                                      framework::dataset::make("DataType",
+                                                                                                                              DataType::QASYMM8)),
+                                                                                                                      framework::dataset::make("Axis", 1)))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLHeightConcatenateLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType",
+                                                                                                                    DataType::QASYMM8)),
+                                                                                                                    framework::dataset::make("Axis", 1)))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/LSTMLayer.cpp b/tests/validation/CL/LSTMLayer.cpp
index ea20bd6..71a9383 100644
--- a/tests/validation/CL/LSTMLayer.cpp
+++ b/tests/validation/CL/LSTMLayer.cpp
@@ -109,7 +109,7 @@
                                                         TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+                                                        TensorInfo(TensorShape(11U, 2U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
                })),
                framework::dataset::make("ScratchInfo", { TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
diff --git a/tests/validation/CL/NonLinearFilter.cpp b/tests/validation/CL/NonLinearFilter.cpp
index 286cc48..536b458 100644
--- a/tests/validation/CL/NonLinearFilter.cpp
+++ b/tests/validation/CL/NonLinearFilter.cpp
@@ -52,8 +52,8 @@
     const uint8_t                          constant_border_value = distribution_u8(generator);
 
     // Create the mask
-    uint8_t mask[mask_size * mask_size];
-    fill_mask_from_pattern(mask, mask_size, mask_size, pattern);
+    std::vector<uint8_t> mask(mask_size * mask_size);
+    fill_mask_from_pattern(mask.data(), mask_size, mask_size, pattern);
     const auto half_mask_size = static_cast<int>(mask_size / 2);
 
     // Create tensors
@@ -65,7 +65,7 @@
 
     // Create and configure function
     CLNonLinearFilter filter;
-    filter.configure(&src, &dst, function, mask_size, pattern, mask, border_mode, constant_border_value);
+    filter.configure(&src, &dst, function, mask_size, pattern, mask.data(), border_mode, constant_border_value);
 
     // Validate valid region
     const ValidRegion dst_valid_region = shape_to_valid_region(shape, border_mode == BorderMode::UNDEFINED, BorderSize(half_mask_size));
diff --git a/tests/validation/CL/PadLayer.cpp b/tests/validation/CL/PadLayer.cpp
index 4bbd7b8..2ad29fc 100644
--- a/tests/validation/CL/PadLayer.cpp
+++ b/tests/validation/CL/PadLayer.cpp
@@ -43,9 +43,9 @@
 const auto PaddingSizesDataset = framework::dataset::make("PaddingSize", { PaddingList{ { 0, 0 } },
     PaddingList{ { 1, 1 } },
     PaddingList{ { 1, 1 }, { 2, 2 } },
-    PaddingList{ { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 } },
-    PaddingList{ { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 2 } },
-    PaddingList{ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 1, 1 } }
+    PaddingList{ { 1, 1 }, { 1, 1 }, { 1, 1 } },
+    PaddingList{ { 0, 0 }, { 1, 0 }, { 0, 1 } },
+    PaddingList{ { 0, 0 }, { 0, 0 }, { 0, 0 } }
 });
 } // namespace
 
@@ -55,32 +55,44 @@
 // *INDENT-OFF*
 // clang-format off
 
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
                framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // Mismatching data type input/output
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Mismatching shapes with padding
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Mismatching shapes dimension
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32)
+                                                       TensorInfo(TensorShape(32U, 13U), 1, DataType::F32)     // Invalid padding list
                                                      }),
                framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16),
                                                        TensorInfo(TensorShape(28U, 11U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(29U, 17U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(29U, 15U, 4U, 3U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(27U, 14U, 3U, 4U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(32U, 13U, 2U, 3U), 1, DataType::F32)
+                                                       TensorInfo(TensorShape(29U, 17U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U), 1, DataType::F32)
                                                      })),
                framework::dataset::make("PaddingSize", { PaddingList{{0, 0}},
-                                                      PaddingList{{1, 1}},
-                                                      PaddingList{{1, 1}, {2, 2}},
-                                                      PaddingList{{1,1}, {1,1}, {1,1}, {1,1}},
-                                                      PaddingList{{0,0}, {1,0}, {0,1}, {1,2}},
-                                                      PaddingList{{0,0}, {0,0}, {0,0}, {1,1}}
-                                                      })),
-               framework::dataset::make("Expected", { false, false, true, true, true, true })),
-               input_info, output_info, padding, expected)
+                                                         PaddingList{{1, 1}},
+                                                         PaddingList{{1, 1}, {2, 2}},
+                                                         PaddingList{{1,1}, {1,1}, {1,1}},
+                                                         PaddingList{{1, 1}, {2, 2}},
+                                                         PaddingList{{0,0}, {0,0}, {1,1}}
+                                                         })),
+               framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT,
+                                                         PaddingMode::CONSTANT,
+                                                         PaddingMode::CONSTANT,
+                                                         PaddingMode::SYMMETRIC,
+                                                         PaddingMode::REFLECT,
+                                                         PaddingMode::REFLECT
+})),
+               framework::dataset::make("Expected", { false,
+                                                   false,
+                                                   true,
+                                                   false,
+                                                   true,
+                                                   false })),
+               input_info, output_info, padding, mode, expected)
 {
-    ARM_COMPUTE_EXPECT(bool(CLPadLayer::validate(&input_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), padding)) == expected, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bool(CLPadLayer::validate(&input_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), padding, PixelValue(), mode)) == expected, framework::LogLevel::ERRORS);
 }
 
 // clang-format on
@@ -92,10 +104,9 @@
 TEST_SUITE(Float)
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunPadding, CLPaddingFixture<float>, framework::DatasetMode::ALL,
-                       combine(
-                           combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::F32 })),
-                           PaddingSizesDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPaddingFixture<float>, framework::DatasetMode::ALL,
+                       combine(combine(combine(datasets::Small3DShapes(), framework::dataset::make("DataType", { DataType::F32 })), PaddingSizesDataset),
+                               framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT, PaddingMode::SYMMETRIC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -103,10 +114,9 @@
 TEST_SUITE_END() // FP32
 
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunPadding, CLPaddingFixture<half>, framework::DatasetMode::ALL,
-                       combine(
-                           combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::F16 })),
-                           PaddingSizesDataset))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPaddingFixture<half>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType", { DataType::F16 })), PaddingSizesDataset),
+                               framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -114,25 +124,18 @@
 TEST_SUITE_END() // FP16
 TEST_SUITE_END() // Float
 
-TEST_SUITE(Integer)
-TEST_SUITE(S8)
-FIXTURE_DATA_TEST_CASE(RunPadding, CLPaddingFixture<int8_t>, framework::DatasetMode::ALL,
-                       combine(
-                           combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::S8 })),
-                           PaddingSizesDataset))
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPaddingFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(datasets::Small3DShapes(), framework::dataset::make("DataType", { DataType::QASYMM8 })), PaddingSizesDataset),
+                               framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-TEST_SUITE_END() // S8
-TEST_SUITE_END() // Integer
-
-TEST_SUITE(Quantized)
-TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunPadding, CLPaddingFixture<uint8_t>, framework::DatasetMode::ALL,
-                       combine(
-                           combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::QASYMM8 })),
-                           PaddingSizesDataset))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPaddingFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType", { DataType::QASYMM8 })), PaddingSizesDataset),
+                               framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
diff --git a/tests/validation/CL/PixelWiseMultiplication.cpp b/tests/validation/CL/PixelWiseMultiplication.cpp
index b61ec39..03ce4c9 100644
--- a/tests/validation/CL/PixelWiseMultiplication.cpp
+++ b/tests/validation/CL/PixelWiseMultiplication.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -129,7 +129,7 @@
 FIXTURE_DATA_TEST_CASE(RunSmall, CLPixelWiseMultiplicationQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
                        framework::dataset::make("DataType", DataType::QASYMM8)),
                        framework::dataset::make("Scale", { 1.f, 2.f })),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
                        framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_EVEN)),
                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp
index 6afafe6..7d79f3f 100644
--- a/tests/validation/CL/PoolingLayer.cpp
+++ b/tests/validation/CL/PoolingLayer.cpp
@@ -74,6 +74,8 @@
 constexpr AbsoluteTolerance<float>   tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */
 constexpr AbsoluteTolerance<float>   tolerance_f16(0.01f);  /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);  /**< Tolerance value for comparing reference's output against implementation's output for 8-bit asymmetric type */
+const auto                           pool_data_layout_dataset = framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC });
+
 } // namespace
 
 TEST_SUITE(CL)
@@ -133,7 +135,7 @@
 FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPSmall,
                                                                                                                   framework::dataset::make("DataType",
                                                                                                                           DataType::F32))),
-                                                                                                          framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                          pool_data_layout_dataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -141,7 +143,7 @@
 FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetFP,
                                                                                                                 framework::dataset::make("DataType",
                                                                                                                         DataType::F32))),
-                                                                                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                        pool_data_layout_dataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -151,14 +153,14 @@
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPSmall,
                                                                                                                  framework::dataset::make("DataType", DataType::F16))),
-                                                                                                         framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                         pool_data_layout_dataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetFP,
                                                                                                                framework::dataset::make("DataType", DataType::F16))),
-                                                                                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                       pool_data_layout_dataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
@@ -172,20 +174,16 @@
 using CLPoolingLayerQuantizedFixture = PoolingLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLPoolingLayer, T>;
 
 TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQASYMM8Small,
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQASYMM8Small,
                                                                                                                      framework::dataset::make("DataType", DataType::QASYMM8))),
-                                                                                                                     framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127),
-                                                                                                                             QuantizationInfo(7.f / 255, 123)
-                                                                                                                                                                  })),
-                                                                                                                     framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                                     pool_data_layout_dataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetQASYMM8,
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetQASYMM8,
                                                                                                                    framework::dataset::make("DataType", DataType::QASYMM8))),
-                                                                                                                   framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255, 0) })),
-                                                                                                                   framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                                   pool_data_layout_dataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
diff --git a/tests/validation/CL/QuantizationLayer.cpp b/tests/validation/CL/QuantizationLayer.cpp
index f0cc4cc..26e0304 100644
--- a/tests/validation/CL/QuantizationLayer.cpp
+++ b/tests/validation/CL/QuantizationLayer.cpp
@@ -53,21 +53,17 @@
 // *INDENT-OFF*
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
-               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::U8),  // Wrong input data type
-                                                       TensorInfo(TensorShape(16U, 5U, 16U), 1, DataType::U8),       // Invalid shape
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8),  // Wrong input data type
                                                        TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32), // Wrong output data type
-                                                       TensorInfo(TensorShape(16U, 16U, 2U, 5U), 1, DataType::U8),   // Mismatching shapes
-                                                       TensorInfo(TensorShape(17U, 16U, 16U, 5U), 1, DataType::U8),  // Shrink window
+                                                       TensorInfo(TensorShape(16U, 16U, 2U, 5U), 1, DataType::F32),   // Mismatching shapes
                                                        TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32), // Valid
                                                      }),
                framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(16U, 5U, 16U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::U16),
-                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(17U, 16U, 16U, 5U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8),
+                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8),
                                                      })),
-               framework::dataset::make("Expected", { false, false, false, false, false, true})),
+               framework::dataset::make("Expected", { false, false, false, true})),
                input_info, output_info, expected)
 {
     ARM_COMPUTE_EXPECT(bool(CLQuantizationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
@@ -79,7 +75,7 @@
 {
     // Create tensors
     CLTensor src = create_tensor<CLTensor>(shape, data_type);
-    CLTensor dst = create_tensor<CLTensor>(shape, DataType::U8);
+    CLTensor dst = create_tensor<CLTensor>(shape, DataType::QASYMM8);
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -94,9 +90,8 @@
     validate(dst.info()->valid_region(), valid_region);
 
     // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 4).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
+    validate(src.info()->padding(), PaddingSize());
+    validate(dst.info()->padding(), PaddingSize());
 }
 
 template <typename T>
@@ -104,19 +99,38 @@
 
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLQuantizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
-                                                                                                               framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLQuantizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
+                                                                                                                       framework::dataset::make("DataType", DataType::F32)),
+                                                                                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLQuantizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(concat(datasets::Large3DShapes(), datasets::Large4DShapes()),
-                                                                                                             framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLQuantizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(concat(datasets::Large3DShapes(), datasets::Large4DShapes()),
+                                                                                                                     framework::dataset::make("DataType", DataType::F32)),
+                                                                                                             framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
 TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLQuantizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
+                                                                                                                      framework::dataset::make("DataType", DataType::F16)),
+                                                                                                              framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLQuantizationLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(concat(datasets::Large3DShapes(), datasets::Large4DShapes()),
+                                                                                                                    framework::dataset::make("DataType", DataType::F16)),
+                                                                                                            framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END() // FP16
 TEST_SUITE_END() // Float
 
 TEST_SUITE_END() // QuantizationLayer
diff --git a/tests/validation/CL/ReductionOperation.cpp b/tests/validation/CL/ReductionOperation.cpp
index c8474e9..79308c8 100644
--- a/tests/validation/CL/ReductionOperation.cpp
+++ b/tests/validation/CL/ReductionOperation.cpp
@@ -63,7 +63,7 @@
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
     framework::dataset::make("InputInfo",          { TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Mismatching data type input/output
-                                                     TensorInfo(TensorShape(128U, 64U), 2, DataType::F32), // Number of Input channels != 1
+                                                     TensorInfo(TensorShape(128U, 64U), 3, DataType::F32), // Number of Input channels != 1
                                                      TensorInfo(TensorShape(128U, 64U), 1, DataType::S16), // DataType != QASYMM8/F16/F32
                                                      TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis >= num_max_dimensions
                                                      TensorInfo(TensorShape(128U, 64U), 1, DataType::QASYMM8), // Axis == 0 and SUM_SQUARE and QASYMM8
diff --git a/tests/validation/CL/Split.cpp b/tests/validation/CL/Split.cpp
index 7736318..e038b89 100644
--- a/tests/validation/CL/Split.cpp
+++ b/tests/validation/CL/Split.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -57,6 +57,7 @@
 {
     std::vector<TensorInfo> outputs_info(splits);
     std::vector<ITensorInfo*> outputs_info_ptr;
+    outputs_info_ptr.reserve(splits);
     for(auto &output_info : outputs_info)
     {
         outputs_info_ptr.emplace_back(&output_info);
@@ -76,6 +77,7 @@
     CLTensor                 src = create_tensor<CLTensor>(shape, data_type);
     std::vector<CLTensor>    dsts(splits);
     std::vector<ICLTensor *> dsts_ptrs;
+    dsts_ptrs.reserve(splits);
     for(auto &dst : dsts)
     {
         dsts_ptrs.emplace_back(&dst);
diff --git a/tests/validation/CL/StackLayer.cpp b/tests/validation/CL/StackLayer.cpp
index b62c750..1715a5e 100644
--- a/tests/validation/CL/StackLayer.cpp
+++ b/tests/validation/CL/StackLayer.cpp
@@ -71,13 +71,13 @@
 const auto shapes_1d_large = combine(datasets::Large1DShapes(), framework::dataset::make("Axis", -1, 2));
 
 /** Shapes 2D to test */
-const auto shapes_2d_large = combine(datasets::Large2DShapes(), framework::dataset::make("Axis", -2, 3));
+const auto shapes_2d_large = combine(datasets::Medium2DShapes(), framework::dataset::make("Axis", -2, 3));
 
 /** Shapes 3D to test */
-const auto shapes_3d_large = combine(datasets::Large3DShapes(), framework::dataset::make("Axis", -3, 4));
+const auto shapes_3d_large = combine(datasets::Medium3DShapes(), framework::dataset::make("Axis", -3, 4));
 
 /** Shapes 4D to test */
-const auto shapes_4d_large = combine(datasets::Large4DShapes(), framework::dataset::make("Axis", -4, 5));
+const auto shapes_4d_large = combine(datasets::Medium4DShapes(), framework::dataset::make("Axis", -4, 5));
 } // namespace
 
 /** Fixture to use */
diff --git a/tests/validation/CL/UNIT/TensorAllocator.cpp b/tests/validation/CL/UNIT/TensorAllocator.cpp
index 849eee8..7e47e3d 100644
--- a/tests/validation/CL/UNIT/TensorAllocator.cpp
+++ b/tests/validation/CL/UNIT/TensorAllocator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,10 +25,16 @@
 
 #include "arm_compute/runtime/CL/CLMemoryGroup.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/Globals.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/reference/ActivationLayer.h"
 
 #include <memory>
+#include <random>
 
 namespace arm_compute
 {
@@ -36,14 +42,33 @@
 {
 namespace validation
 {
+namespace
+{
+cl_mem import_malloc_memory_helper(void *ptr, size_t size)
+{
+    const cl_import_properties_arm import_properties[] =
+    {
+        CL_IMPORT_TYPE_ARM,
+        CL_IMPORT_TYPE_HOST_ARM,
+        0
+    };
+
+    cl_int err = CL_SUCCESS;
+    cl_mem buf = clImportMemoryARM(CLKernelLibrary::get().context().get(), CL_MEM_READ_WRITE, import_properties, ptr, size, &err);
+    ARM_COMPUTE_ASSERT(err == CL_SUCCESS);
+
+    return buf;
+}
+} // namespace
+
 TEST_SUITE(CL)
 TEST_SUITE(UNIT)
 TEST_SUITE(TensorAllocator)
 
-TEST_CASE(ImportMemory, framework::DatasetMode::ALL)
+TEST_CASE(ImportMemoryBuffer, framework::DatasetMode::ALL)
 {
     // Init tensor info
-    TensorInfo info(TensorShape(24U, 16U, 3U), 1, DataType::F32);
+    const TensorInfo info(TensorShape(24U, 16U, 3U), 1, DataType::F32);
 
     // Allocate memory buffer
     const size_t total_size = info.total_size();
@@ -62,20 +87,88 @@
     ARM_COMPUTE_EXPECT(!bool(t2.allocator()->import_memory(buf)), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(t2.info()->is_resizable(), framework::LogLevel::ERRORS);
 
-    // Positive case : Set raw pointer
-    CLTensor t3;
-    t3.allocator()->init(info);
-    ARM_COMPUTE_EXPECT(bool(t3.allocator()->import_memory(buf)), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(!t3.info()->is_resizable(), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(t3.cl_buffer().get() == buf.get(), framework::LogLevel::ERRORS);
-    t3.allocator()->free();
+    // Negative case : Invalid buffer size
+    CLTensor         t3;
+    const TensorInfo info_neg(TensorShape(32U, 16U, 3U), 1, DataType::F32);
+    t3.allocator()->init(info_neg);
+    ARM_COMPUTE_EXPECT(!bool(t3.allocator()->import_memory(buf)), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(t3.info()->is_resizable(), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(t3.cl_buffer().get() != buf.get(), framework::LogLevel::ERRORS);
+
+    // Positive case : Set raw pointer
+    CLTensor t4;
+    t4.allocator()->init(info);
+    ARM_COMPUTE_EXPECT(bool(t4.allocator()->import_memory(buf)), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(!t4.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(t4.cl_buffer().get() == buf.get(), framework::LogLevel::ERRORS);
+    t4.allocator()->free();
+    ARM_COMPUTE_EXPECT(t4.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(t4.cl_buffer().get() != buf.get(), framework::LogLevel::ERRORS);
 }
 
-TEST_SUITE_END()
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_CASE(ImportMemoryMalloc, framework::DatasetMode::ALL)
+{
+    // Check if import extension is supported
+    if(!device_supports_extension(CLKernelLibrary::get().get_device(), "cl_arm_import_memory_host"))
+    {
+        return;
+    }
+    else
+    {
+        const ActivationLayerInfo act_info(ActivationLayerInfo::ActivationFunction::RELU);
+        const TensorShape         shape     = TensorShape(24U, 16U, 3U);
+        const DataType            data_type = DataType::F32;
+
+        // Create tensor
+        const TensorInfo info(shape, 1, data_type);
+        CLTensor         tensor;
+        tensor.allocator()->init(info);
+
+        // Create and configure activation function
+        CLActivationLayer act_func;
+        act_func.configure(&tensor, nullptr, act_info);
+
+        // Allocate and import tensor
+        const size_t total_size_in_elems = tensor.info()->tensor_shape().total_size();
+        const size_t total_size_in_bytes = tensor.info()->total_size();
+        const size_t alignment           = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
+        size_t       space               = total_size_in_bytes + alignment;
+        auto         raw_data            = support::cpp14::make_unique<uint8_t[]>(space);
+
+        void *aligned_ptr = raw_data.get();
+        support::cpp11::align(alignment, total_size_in_bytes, aligned_ptr, space);
+
+        cl::Buffer wrapped_buffer(import_malloc_memory_helper(aligned_ptr, total_size_in_bytes));
+        ARM_COMPUTE_EXPECT(bool(tensor.allocator()->import_memory(wrapped_buffer)), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!tensor.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensor
+        std::uniform_real_distribution<float> distribution(-5.f, 5.f);
+        std::mt19937                          gen(library->seed());
+        auto                                 *typed_ptr = reinterpret_cast<float *>(aligned_ptr);
+        for(unsigned int i = 0; i < total_size_in_elems; ++i)
+        {
+            typed_ptr[i] = distribution(gen);
+        }
+
+        // Execute function and sync
+        act_func.run();
+        CLScheduler::get().sync();
+
+        // Validate result by checking that the input has no negative values
+        for(unsigned int i = 0; i < total_size_in_elems; ++i)
+        {
+            ARM_COMPUTE_EXPECT(typed_ptr[i] >= 0, framework::LogLevel::ERRORS);
+        }
+
+        // Release resources
+        tensor.allocator()->free();
+        ARM_COMPUTE_EXPECT(tensor.info()->is_resizable(), framework::LogLevel::ERRORS);
+    }
+}
+
+TEST_SUITE_END() // TensorAllocator
+TEST_SUITE_END() // UNIT
+TEST_SUITE_END() // CL
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/CL/WidthConcatenateLayer.cpp b/tests/validation/CL/WidthConcatenateLayer.cpp
index 647e041..52a4e4c 100644
--- a/tests/validation/CL/WidthConcatenateLayer.cpp
+++ b/tests/validation/CL/WidthConcatenateLayer.cpp
@@ -24,14 +24,14 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLWidthConcatenateLayer.h"
+#include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/WidthConcatenateLayerFixture.h"
+#include "tests/validation/fixtures/ConcatenateLayerFixture.h"
 
 namespace arm_compute
 {
@@ -47,19 +47,24 @@
               framework::dataset::make("InputInfo1", {  TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching data type input/output
                                                         TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching y dimension
                                                         TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching total width
-                                                        TensorInfo(TensorShape(16U, 27U, 5U), 1, DataType::F32)
+                                                        TensorInfo(TensorShape(16U, 27U, 5U), 1, DataType::F32),
+                                                        TensorInfo(TensorShape(21U, 35U, 5U), 1, DataType::F32)
+
               }),
               framework::dataset::make("InputInfo2", {  TensorInfo(TensorShape(24U, 27U, 4U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(52U, 27U, 5U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(52U, 27U, 5U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(16U, 27U, 5U), 1, DataType::F32)
+                                                        TensorInfo(TensorShape(16U, 27U, 5U), 1, DataType::F32),
+                                                        TensorInfo(TensorShape(10U, 35U, 5U), 1, DataType::F32)
               })),
               framework::dataset::make("OutputInfo", {  TensorInfo(TensorShape(47U, 27U, 5U), 1, DataType::F16),
                                                         TensorInfo(TensorShape(75U, 12U, 5U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(11U, 27U, 5U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(32U, 27U, 5U), 1, DataType::F32)
+                                                        TensorInfo(TensorShape(32U, 27U, 5U), 1, DataType::F32),
+                                                        TensorInfo(TensorShape(31U, 35U, 5U), 1, DataType::F32)
+
               })),
-              framework::dataset::make("Expected", { false, false, false, true })),
+              framework::dataset::make("Expected", { false, false, false, true, true })),
               input_info1, input_info2, output_info,expected)
 {
     std::vector<TensorInfo> inputs_vector_info;
@@ -67,13 +72,13 @@
     inputs_vector_info.emplace_back(std::move(input_info2));
 
     std::vector<ITensorInfo *> inputs_vector_info_raw;
+    inputs_vector_info_raw.reserve(inputs_vector_info.size());
     for(auto &input : inputs_vector_info)
     {
         inputs_vector_info_raw.emplace_back(&input);
     }
 
-    bool is_valid = bool(CLWidthConcatenateLayer::validate(inputs_vector_info_raw,
-                                                           &output_info.clone()->set_is_resizable(false)));
+    bool is_valid = bool(CLConcatenateLayer::validate(inputs_vector_info_raw,&output_info.clone()->set_is_resizable(true), 0));
     ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
@@ -93,26 +98,30 @@
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
     // Create and configure function
-    CLWidthConcatenateLayer concat_layer;
+    CLConcatenateLayer concat_layer;
 
-    concat_layer.configure({ &src1, &src2, &src3 }, &dst);
+    concat_layer.configure({ &src1, &src2, &src3 }, &dst, 0);
 }
 
 template <typename T>
-using CLWidthConcatenateLayerFixture = WidthConcatenateLayerValidationFixture<CLTensor, ICLTensor, CLAccessor, CLWidthConcatenateLayer, T>;
+using CLWidthConcatenateLayerFixture = ConcatenateLayerValidationFixture<CLTensor, ICLTensor, CLAccessor, CLConcatenateLayer, T>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
                                                                                                                   framework::dataset::make("DataType",
-                                                                                                                          DataType::F16)))
+                                                                                                                          DataType::F16)),
+                                                                                                                  framework::dataset::make("Axis", 0)))
+
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWidthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(concat(datasets::Large2DShapes(), datasets::Small4DShapes()),
-                                                                                                                framework::dataset::make("DataType",
-                                                                                                                        DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLWidthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(concat(datasets::Large2DShapes(), datasets::Small4DShapes()),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::F16)),
+                                                                                                                framework::dataset::make("Axis", 0)))
+
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -120,15 +129,18 @@
 TEST_SUITE_END()
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
                                                                                                                    framework::dataset::make("DataType",
-                                                                                                                           DataType::F32)))
+                                                                                                                           DataType::F32)),
+                                                                                                                   framework::dataset::make("Axis", 0)))
+
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWidthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::WidthConcatenateLayerShapes(), framework::dataset::make("DataType",
-                                                                                                                 DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLWidthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::F32)),
+                                                                                                                 framework::dataset::make("Axis", 0)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -138,15 +150,17 @@
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWidthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
                                                                                                                      framework::dataset::make("DataType",
-                                                                                                                             DataType::QASYMM8)))
+                                                                                                                             DataType::QASYMM8)),
+                                                                                                                     framework::dataset::make("Axis", 0)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWidthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::WidthConcatenateLayerShapes(), framework::dataset::make("DataType",
-                                                                                                                   DataType::QASYMM8)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLWidthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::QASYMM8)),
+                                                                                                                   framework::dataset::make("Axis", 0)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
diff --git a/tests/validation/CL/Winograd.cpp b/tests/validation/CL/Winograd.cpp
index e744473..62f0335 100644
--- a/tests/validation/CL/Winograd.cpp
+++ b/tests/validation/CL/Winograd.cpp
@@ -54,7 +54,7 @@
 {
 // *INDENT-OFF*
 // clang-format off
-constexpr AbsoluteTolerance<float> tolerance_f32(0.001f);
+constexpr AbsoluteTolerance<float> tolerance_f32(0.002f);
 const AbsoluteTolerance<half> tolerance_f16(half(0.5f));
 constexpr AbsoluteTolerance<float> tolerance_convolution_layer_f32(0.1f);
 const AbsoluteTolerance<half> tolerance_convolution_layer_f16(half(0.4f));
@@ -81,6 +81,11 @@
                                                     framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x1_5x1(),
                                                                                datasets::SmallWinogradInputTransformDataset1x4_1x5())))));
 
+const auto SmallWinogradInputTransformDatasetNHWC_FP32 = framework::dataset::concat(SmallWinogradInputTransformDatasetNHWC,
+                                                         framework::dataset::concat(datasets::SmallWinogradInputTransformDataset1x2_1x7(),
+                                                         framework::dataset::concat(datasets::SmallWinogradInputTransformDataset2x1_7x1(),
+                                                                                    datasets::SmallWinogradInputTransformDataset2x2_7x7())));
+
 const auto LargeWinogradInputTransformDatasetNCHW =
            framework::dataset::concat(datasets::LargeWinogradInputTransformDataset2x2_3x3(),
            framework::dataset::concat(datasets::LargeWinogradInputTransformDataset2x1_3x1(),
@@ -98,6 +103,12 @@
            framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x1_5x1(),
                                       datasets::LargeWinogradInputTransformDataset1x4_1x5())));
 
+const auto LargeWinogradInputTransformDatasetNHWC_FP32 =
+           framework::dataset::concat(LargeWinogradInputTransformDatasetNHWC,
+           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset1x2_1x7(),
+           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset2x1_7x1(),
+                                     (datasets::LargeWinogradInputTransformDataset2x2_7x7()))));
+
 // Filter transform
 const auto SmallWinogradFilterTransformDatasetNCHW =
            framework::dataset::concat(combine(datasets::Small3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })),
@@ -107,13 +118,19 @@
            framework::dataset::concat(combine(datasets::Small5x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })),
                                       combine(datasets::Small1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })))))));
 
-const auto SmallWinogradFilterTransformDatasetNHWC =
+const auto SmallWinogradFilterTransformDatasetNHWC_F16 =
            framework::dataset::concat(combine(datasets::Small3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })),
            framework::dataset::concat(combine(datasets::Small3x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })),
            framework::dataset::concat(combine(datasets::Small1x3Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })),
            framework::dataset::concat(combine(datasets::Small5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })),
            framework::dataset::concat(combine(datasets::Small5x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })),
-                                      combine(datasets::Small1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })))))));
+                                     (combine(datasets::Small1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) }))))))));
+
+const auto SmallWinogradFilterTransformDatasetNHWC_F32 =
+           framework::dataset::concat(SmallWinogradFilterTransformDatasetNHWC_F16,
+           framework::dataset::concat(combine(datasets::Small7x7Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U) })),
+           framework::dataset::concat(combine(datasets::Small7x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U) })),
+                                      combine(datasets::Small1x7Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U) })))));
 
 const auto LargeWinogradFilterTransformDatasetNCHW =
            framework::dataset::concat(combine(datasets::Large3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })),
@@ -123,7 +140,7 @@
            framework::dataset::concat(combine(datasets::Large5x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })),
                                       combine(datasets::Large1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })))))));
 
-const auto LargeWinogradFilterTransformDatasetNHWC =
+const auto LargeWinogradFilterTransformDatasetNHWC_F16 =
            framework::dataset::concat(combine(datasets::Large3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })),
            framework::dataset::concat(combine(datasets::Large3x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })),
            framework::dataset::concat(combine(datasets::Large1x3Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })),
@@ -131,14 +148,24 @@
            framework::dataset::concat(combine(datasets::Large5x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })),
                                       combine(datasets::Large1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })))))));
 
+const auto LargeWinogradFilterTransformDatasetNHWC_F32 =
+           framework::dataset::concat(LargeWinogradFilterTransformDatasetNHWC_F16,
+           framework::dataset::concat(combine(datasets::Large7x7Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U) })),
+           framework::dataset::concat(combine(datasets::Large7x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U) })),
+                                      combine(datasets::Large1x7Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U) })))));
+
 // Output transform
 const auto SmallWinogradOutputTransformDatasetNCHW = datasets::SmallWinogradOutputTransformDatasetNCHW();
 
-const auto SmallWinogradOutputTransformDatasetNHWC = datasets::SmallWinogradOutputTransformDatasetNHWC();
+const auto SmallWinogradOutputTransformDatasetNHWC_F16 = datasets::SmallWinogradOutputTransformDatasetNHWC_F16();
+
+const auto SmallWinogradOutputTransformDatasetNHWC_F32 = datasets::SmallWinogradOutputTransformDatasetNHWC_F32();
 
 const auto LargeWinogradOutputTransformDatasetNCHW = datasets::LargeWinogradOutputTransformDatasetNCHW();
 
-const auto LargeWinogradOutputTransformDatasetNHWC = datasets::LargeWinogradOutputTransformDatasetNHWC();
+const auto LargeWinogradOutputTransformDatasetNHWC_F16 = datasets::LargeWinogradOutputTransformDatasetNHWC_F16();
+
+const auto LargeWinogradOutputTransformDatasetNHWC_F32 = datasets::LargeWinogradOutputTransformDatasetNHWC_F32();
 
 //Activation Functions
 const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
@@ -252,14 +279,14 @@
 }
 TEST_SUITE_END() // FP16
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradInputTransformFixtureFP32, framework::DatasetMode::PRECOMMIT, combine(combine(SmallWinogradInputTransformDatasetNHWC,
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradInputTransformFixtureFP32, framework::DatasetMode::PRECOMMIT, combine(combine(SmallWinogradInputTransformDatasetNHWC_FP32,
                                                                                                                      framework::dataset::make("DataLayout", { DataLayout::NHWC })),
                                                                                                                      framework::dataset::make("DataType", { DataType::F32 })))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradInputTransformFixtureFP32, framework::DatasetMode::NIGHTLY, combine(combine(LargeWinogradInputTransformDatasetNHWC,
+FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradInputTransformFixtureFP32, framework::DatasetMode::NIGHTLY, combine(combine(LargeWinogradInputTransformDatasetNHWC_FP32,
                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NHWC })),
                                                                                                                    framework::dataset::make("DataType", { DataType::F32 })))
 {
@@ -352,7 +379,7 @@
 TEST_SUITE(NHWC)
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradFilterTransformFixtureFP16, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(SmallWinogradFilterTransformDatasetNHWC,
+                       combine(combine(SmallWinogradFilterTransformDatasetNHWC_F16,
                                        framework::dataset::make("DataLayout", { DataLayout::NHWC })),
                                        framework::dataset::make("DataType", { DataType::F16 })))
 {
@@ -361,7 +388,7 @@
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradFilterTransformFixtureFP16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(LargeWinogradFilterTransformDatasetNHWC,
+                       combine(combine(LargeWinogradFilterTransformDatasetNHWC_F16,
                                        framework::dataset::make("DataLayout", { DataLayout::NHWC })),
                                        framework::dataset::make("DataType", { DataType::F16 })))
 {
@@ -371,7 +398,7 @@
 TEST_SUITE_END() // FP16
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradFilterTransformFixtureFP32, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(SmallWinogradFilterTransformDatasetNHWC,
+                       combine(combine(SmallWinogradFilterTransformDatasetNHWC_F32,
                                        framework::dataset::make("DataLayout", { DataLayout::NHWC })),
                                        framework::dataset::make("DataType", { DataType::F32 })))
 {
@@ -380,7 +407,7 @@
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradFilterTransformFixtureFP32, framework::DatasetMode::NIGHTLY,
-                       combine(combine(LargeWinogradFilterTransformDatasetNHWC,
+                       combine(combine(LargeWinogradFilterTransformDatasetNHWC_F32,
                                        framework::dataset::make("DataLayout", { DataLayout::NHWC })),
                                        framework::dataset::make("DataType", { DataType::F32 })))
 {
@@ -491,7 +518,7 @@
 TEST_SUITE(NHWC)
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradOutputTransformFixtureFP16, framework::DatasetMode::ALL,
-                       combine(combine(SmallWinogradOutputTransformDatasetNHWC,
+                       combine(combine(SmallWinogradOutputTransformDatasetNHWC_F16,
                                framework::dataset::make("DataType", { DataType::F16 })),
                                framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) ))
 {
@@ -500,7 +527,7 @@
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradOutputTransformFixtureFP16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(LargeWinogradOutputTransformDatasetNHWC,
+                       combine(combine(LargeWinogradOutputTransformDatasetNHWC_F16,
                                framework::dataset::make("DataType", { DataType::F16 })),
                                framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) ))
 {
@@ -510,7 +537,7 @@
 TEST_SUITE_END() // FP16
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradOutputTransformFixtureFP32, framework::DatasetMode::ALL,
-                       combine(combine(SmallWinogradOutputTransformDatasetNHWC,
+                       combine(combine(SmallWinogradOutputTransformDatasetNHWC_F32,
                                framework::dataset::make("DataType", { DataType::F32 })),
                                framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) ))
 {
@@ -519,7 +546,7 @@
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradOutputTransformFixtureFP32, framework::DatasetMode::NIGHTLY,
-                       combine(combine(LargeWinogradOutputTransformDatasetNHWC,
+                       combine(combine(LargeWinogradOutputTransformDatasetNHWC_F32,
                                framework::dataset::make("DataType", { DataType::F32 })),
                                framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) ))
 {
diff --git a/tests/validation/CPP/DFT.cpp b/tests/validation/CPP/DFT.cpp
new file mode 100644
index 0000000..8f1b823
--- /dev/null
+++ b/tests/validation/CPP/DFT.cpp
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/SimpleTensor.h"
+#include "tests/SimpleTensorAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+
+#include "tests/validation/Validation.h"
+#include "tests/validation/reference/ConvolutionLayer.h"
+#include "tests/validation/reference/DFT.h"
+
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+auto shapes_1d_dft = framework::dataset::make("TensorShape", { TensorShape(33U),
+                                                               TensorShape(8U),
+                                                               TensorShape(23U, 7U),
+                                                               TensorShape(16U, 8U, 4U)
+                                                             });
+
+auto shapes_2d_dft = framework::dataset::make("TensorShape", { TensorShape(33U, 14U),
+                                                               TensorShape(8U, 9U),
+                                                               TensorShape(23U, 7U, 3U),
+                                                               TensorShape(16U, 8U, 4U)
+                                                             });
+
+auto conv_dataset_dft = framework::dataset::zip(framework::dataset::zip(framework::dataset::make("InputShape", { TensorShape(8U, 7U, 3U, 2U),
+                                                                                                                 TensorShape(18U, 22U, 4U),
+                                                                                                                 TensorShape(32U, 48U, 8U)
+                                                                                                               }),
+                                                                        framework::dataset::make("WeightShape", { TensorShape(3U, 3U, 3U, 6U),
+                                                                                                                  TensorShape(5U, 5U, 4U, 3U),
+                                                                                                                  TensorShape(9U, 9U, 8U, 3U)
+                                                                                                                })),
+                                                framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 1, 1),
+                                                                                       PadStrideInfo(1, 1, 2, 2),
+                                                                                       PadStrideInfo(1, 1, 4, 4)
+                                                                                     }));
+} // namespace
+TEST_SUITE(CPP)
+TEST_SUITE(DFT)
+
+TEST_SUITE(DFT1D)
+DATA_TEST_CASE(Real, framework::DatasetMode::ALL, shapes_1d_dft,
+               shape)
+{
+    SimpleTensor<float>                   src{ shape, DataType::F32, 1 };
+    std::uniform_real_distribution<float> distribution(-5.f, 5.f);
+    library->fill(src, distribution, 0);
+
+    const bool is_odd = shape.x() % 2;
+
+    // Forward pass
+    auto forward = reference::rdft_1d(src);
+    // Backward pass
+    auto backward = reference::ridft_1d(forward, is_odd);
+
+    // Validate with input
+    validate(SimpleTensorAccessor<float>(src), backward, RelativeTolerance<float>(0.1f));
+}
+
+DATA_TEST_CASE(Complex, framework::DatasetMode::ALL, shapes_1d_dft,
+               shape)
+{
+    SimpleTensor<float>                   src{ shape, DataType::F32, 2 };
+    std::uniform_real_distribution<float> distribution(-5.f, 5.f);
+    library->fill(src, distribution, 0);
+
+    // Forward pass
+    auto forward = reference::dft_1d(src, reference::FFTDirection::Forward);
+    // Backward pass
+    auto backward = reference::dft_1d(forward, reference::FFTDirection::Inverse);
+
+    // Validate with input
+    validate(SimpleTensorAccessor<float>(src), backward, RelativeTolerance<float>(0.1f));
+}
+TEST_SUITE_END() // DFT1D
+
+TEST_SUITE(DFT2D)
+DATA_TEST_CASE(Real, framework::DatasetMode::ALL, shapes_2d_dft,
+               shape)
+{
+    SimpleTensor<float>                   src{ shape, DataType::F32, 1 };
+    std::uniform_real_distribution<float> distribution(-5.f, 5.f);
+    library->fill(src, distribution, 0);
+
+    const bool is_odd = shape.x() % 2;
+
+    // Forward pass
+    auto forward = reference::rdft_2d(src);
+    // Backward pass
+    auto backward = reference::ridft_2d(forward, is_odd);
+
+    // Validate with input
+    validate(SimpleTensorAccessor<float>(src), backward, RelativeTolerance<float>(0.1f));
+}
+
+DATA_TEST_CASE(Complex, framework::DatasetMode::ALL, shapes_2d_dft,
+               shape)
+{
+    SimpleTensor<float>                   src{ shape, DataType::F32, 2 };
+    std::uniform_real_distribution<float> distribution(-5.f, 5.f);
+    library->fill(src, distribution, 0);
+
+    // Forward pass
+    auto forward = reference::dft_2d(src, reference::FFTDirection::Forward);
+    // Backward pass
+    auto backward = reference::dft_2d(forward, reference::FFTDirection::Inverse);
+
+    // Validate with input
+    validate(SimpleTensorAccessor<float>(src), backward, RelativeTolerance<float>(0.1f));
+}
+TEST_SUITE_END() // DFT2D
+
+TEST_SUITE(Conv)
+DATA_TEST_CASE(Real2Real, framework::DatasetMode::ALL, conv_dataset_dft,
+               shape_in, shape_w, conv_info)
+{
+    std::uniform_real_distribution<float> distribution(-1.f, 1.f);
+    std::uniform_real_distribution<float> distribution_b(0.f, 0.f);
+
+    SimpleTensor<float> src{ shape_in, DataType::F32, 1 };
+    SimpleTensor<float> w{ shape_w, DataType::F32, 1 };
+    SimpleTensor<float> b{ TensorShape(shape_w[3]), DataType::F32, 1 };
+
+    library->fill(src, distribution, 0);
+    library->fill(w, distribution, 1);
+    library->fill(b, distribution_b, 2);
+
+    const auto  output_wh = arm_compute::scaled_dimensions(shape_in.x(), shape_in.y(), shape_w.x(), shape_w.y(), conv_info);
+    TensorShape dst_shape = shape_in;
+    dst_shape.set(0, output_wh.first);
+    dst_shape.set(1, output_wh.second);
+    dst_shape.set(2, shape_w[3]);
+
+    // FFT based convolution
+    auto dst = reference::conv2d_dft(src, w, conv_info);
+    // Reference convolution
+    auto dst_ref = reference::convolution_layer(src, w, b, dst_shape, conv_info);
+
+    // Validate with input
+    validate(SimpleTensorAccessor<float>(dst), dst_ref, RelativeTolerance<float>(0.1f), 0.f, AbsoluteTolerance<float>(0.001f));
+}
+TEST_SUITE_END() // Conv
+
+TEST_SUITE_END() // DFT
+TEST_SUITE_END() // CPP
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp b/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp
index 7af3050..04e91d6 100644
--- a/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp
+++ b/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,14 +24,14 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
 #include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
-#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCConcatenateLayer.h"
 #include "tests/GLES_COMPUTE/GCAccessor.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/DepthConcatenateLayerFixture.h"
+#include "tests/validation/fixtures/ConcatenateLayerFixture.h"
 
 namespace arm_compute
 {
@@ -42,21 +42,23 @@
 TEST_SUITE(GC)
 TEST_SUITE(DepthConcatenateLayer)
 
-//TODO(COMPMID-415): Add configuration test?
-
 template <typename T>
-using GCDepthConcatenateLayerFixture = DepthConcatenateLayerValidationFixture<GCTensor, IGCTensor, GCAccessor, GCDepthConcatenateLayer, T>;
+using GCDepthConcatenateLayerFixture = ConcatenateLayerValidationFixture<GCTensor, IGCTensor, GCAccessor, GCConcatenateLayer, T>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
-                                                                                                                  DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::Small3DShapes(),
+                                                                                                                  framework::dataset::make("DataType",
+                                                                                                                          DataType::F16)),
+                                                                                                                  framework::dataset::make("Axis", 2)))
 {
     // Validate output
     validate(GCAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
-                                                                                                                DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large3DShapes(),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::F16)),
+                                                                                                                framework::dataset::make("Axis", 2)))
 {
     // Validate output
     validate(GCAccessor(_target), _reference);
@@ -64,14 +66,17 @@
 TEST_SUITE_END()
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
-                                                                                                                   DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::Small3DShapes(),
+                                                                                                                   framework::dataset::make("DataType",
+                                                                                                                           DataType::F32)),
+                                                                                                                   framework::dataset::make("Axis", 2)))
 {
     // Validate output
     validate(GCAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType",
-                                                                                                                 DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::F32)),
+                                                                                                                 framework::dataset::make("Axis", 2)))
 {
     // Validate output
     validate(GCAccessor(_target), _reference);
diff --git a/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp b/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp
index 22b1e08..c31cae3 100644
--- a/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -18,7 +18,7 @@
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
@@ -46,6 +46,10 @@
 constexpr float         tolerance_num = 0.07f;     /**< Tolerance number */
 
 const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 3 });
+
+//Activation Functions
+const auto ActivationFunctionsEmptyDataset = framework::dataset::make("ActivationInfo",
+{ ActivationLayerInfo() });
 } // namespace
 
 TEST_SUITE(GC)
@@ -57,19 +61,21 @@
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
 TEST_SUITE(W3x3)
-FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
                                                                                                                    depth_multipliers),
                                                                                                                    framework::dataset::make("DataType",
                                                                                                                            DataType::F16)),
-                                                                                                                   framework::dataset::make("DataLayout", DataLayout::NCHW)))
+                                                                                                                   framework::dataset::make("DataLayout", DataLayout::NCHW)),
+                                                                                                                   ActivationFunctionsEmptyDataset))
 {
     validate(GCAccessor(_target), _reference, tolerance_fp16, tolerance_num);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
                                                                                                                        depth_multipliers),
                                                                                                                        framework::dataset::make("DataType",
                                                                                                                                DataType::F16)),
-                                                                                                                       framework::dataset::make("DataLayout", DataLayout::NCHW)))
+                                                                                                                       framework::dataset::make("DataLayout", DataLayout::NCHW)),
+                                                                                                                       ActivationFunctionsEmptyDataset))
 {
     validate(GCAccessor(_target), _reference, tolerance_fp16, tolerance_num);
 }
diff --git a/tests/validation/Helpers.cpp b/tests/validation/Helpers.cpp
index 11c454e..71a674b 100644
--- a/tests/validation/Helpers.cpp
+++ b/tests/validation/Helpers.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -74,45 +74,6 @@
     }
 }
 
-TensorShape calculate_depth_concatenate_shape(const std::vector<TensorShape> &input_shapes)
-{
-    ARM_COMPUTE_ERROR_ON(input_shapes.empty());
-
-    TensorShape out_shape = input_shapes[0];
-
-    size_t max_x = 0;
-    size_t max_y = 0;
-    size_t depth = 0;
-
-    for(const auto &shape : input_shapes)
-    {
-        max_x = std::max(shape.x(), max_x);
-        max_y = std::max(shape.y(), max_y);
-        depth += shape.z();
-    }
-
-    out_shape.set(0, max_x);
-    out_shape.set(1, max_y);
-    out_shape.set(2, depth);
-
-    return out_shape;
-}
-
-TensorShape calculate_width_concatenate_shape(const std::vector<TensorShape> &input_shapes)
-{
-    ARM_COMPUTE_ERROR_ON(input_shapes.empty());
-
-    TensorShape out_shape = input_shapes[0];
-
-    int width = std::accumulate(input_shapes.begin(), input_shapes.end(), 0, [](int sum, const TensorShape & shape)
-    {
-        return sum + shape.x();
-    });
-    out_shape.set(0, width);
-
-    return out_shape;
-}
-
 HarrisCornersParameters harris_corners_parameters()
 {
     HarrisCornersParameters params;
@@ -308,7 +269,7 @@
 
     const int min_bound = quant_info.quantize(min, RoundingPolicy::TO_NEAREST_UP);
     const int max_bound = quant_info.quantize(max, RoundingPolicy::TO_NEAREST_UP);
-    return std::pair<int, int>(min_bound, max_bound);
+    return std::pair<int, int> { min_bound, max_bound };
 }
 
 template void get_tile(const SimpleTensor<float> &in, SimpleTensor<float> &roi, const Coordinates &coord);
diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h
index 4d1d214..2e8c667 100644
--- a/tests/validation/Helpers.h
+++ b/tests/validation/Helpers.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -128,13 +128,14 @@
  */
 TensorShape calculate_depth_concatenate_shape(const std::vector<TensorShape> &input_shapes);
 
-/** Calculate output tensor shape give a vector of input tensor to concatenate
+/** Calculate output tensor shape for the concatenate operation along a given axis
  *
  * @param[in] input_shapes Shapes of the tensors to concatenate across width.
+ * @param[in] axis         Axis to use for the concatenate operation
  *
  * @return The shape of output concatenated tensor.
  */
-TensorShape calculate_width_concatenate_shape(const std::vector<TensorShape> &input_shapes);
+TensorShape calculate_concatenate_shape(const std::vector<TensorShape> &input_shapes, size_t axis);
 
 /** Parameters of Harris Corners algorithm. */
 struct HarrisCornersParameters
diff --git a/tests/validation/NEON/ArithmeticAddition.cpp b/tests/validation/NEON/ArithmeticAddition.cpp
index bad0b33..4a72dfc 100644
--- a/tests/validation/NEON/ArithmeticAddition.cpp
+++ b/tests/validation/NEON/ArithmeticAddition.cpp
@@ -248,7 +248,7 @@
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
                shape, policy)
 {
     // Create tensors
@@ -274,7 +274,7 @@
                        NEArithmeticAdditionQuantizedFixture<uint8_t>,
                        framework::DatasetMode::PRECOMMIT,
                        combine(combine(combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionQASYMM8Dataset),
-                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
                                                framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })),
                                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
                                framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })))
@@ -282,7 +282,7 @@
     // Validate output
 #ifdef __aarch64__
     validate(Accessor(_target), _reference);
-#else //__aarch64__
+#else  //__aarch64__
     validate(Accessor(_target), _reference, tolerance_qasymm8);
 #endif //__aarch64__
 }
diff --git a/tests/validation/NEON/ArithmeticSubtraction.cpp b/tests/validation/NEON/ArithmeticSubtraction.cpp
index 83ebf34..650738c 100644
--- a/tests/validation/NEON/ArithmeticSubtraction.cpp
+++ b/tests/validation/NEON/ArithmeticSubtraction.cpp
@@ -43,18 +43,37 @@
 {
 namespace
 {
+
+#ifdef __aarch64__
+constexpr AbsoluteTolerance<float> tolerance_qasymm8(0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+#else  //__aarch64__
+constexpr AbsoluteTolerance<float> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+#endif //__aarch64__
+
 /** Input data sets **/
-const auto ArithmeticSubtractionU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)),
-                                                    framework::dataset::make("DataType",
-                                                                             DataType::U8));
-const auto ArithmeticSubtractionS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
+const auto ArithmeticSubtractionQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8),
+                                                                 framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                         framework::dataset::make("DataType", DataType::QASYMM8));
+
+const auto ArithmeticSubtractionU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8),
+                                                            framework::dataset::make("DataType", DataType::U8)),
+                                                    framework::dataset::make("DataType", DataType::U8));
+
+const auto ArithmeticSubtractionS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }),
+                                                             framework::dataset::make("DataType", DataType::S16)),
                                                      framework::dataset::make("DataType", DataType::S16));
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-const auto ArithmeticSubtractionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
+const auto ArithmeticSubtractionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16),
+                                                              framework::dataset::make("DataType", DataType::F16)),
                                                       framework::dataset::make("DataType", DataType::F16));
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-const auto ArithmeticSubtractionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
+const auto ArithmeticSubtractionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32),
+                                                              framework::dataset::make("DataType", DataType::F32)),
                                                       framework::dataset::make("DataType", DataType::F32));
+
+const auto ArithmeticSubtractionQuantizationInfoDataset = combine(combine(framework::dataset::make("QuantizationInfoIn1", { QuantizationInfo(10, 120) }),
+                                                                          framework::dataset::make("QuantizationInfoIn2", { QuantizationInfo(20, 110) })),
+                                                                  framework::dataset::make("QuantizationInfoOut", { QuantizationInfo(15, 125) }));
 } // namespace
 
 TEST_SUITE(NEON)
@@ -65,29 +84,41 @@
 
 // *INDENT-OFF*
 // clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
         framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                  TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                  TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),      // Window shrink
                                                  TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid data type combination
                                                  TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
+                                                 TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::QASYMM8), // Mismatching types
+                                                 TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Invalid convert policy
         }),
         framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                 TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
                                                 TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
                                                 TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+                                                TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+                                                TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8),
         })),
         framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
                                                 TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
                                                 TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                 TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+                                                TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8),
         })),
-        framework::dataset::make("Expected", { true, true, false, false, false})),
-        input1_info, input2_info, output_info, expected)
+        framework::dataset::make("ConvertPolicy",{ ConvertPolicy::WRAP,
+                                                ConvertPolicy::SATURATE,
+                                                ConvertPolicy::WRAP,
+                                                ConvertPolicy::SATURATE,
+                                                ConvertPolicy::WRAP,
+                                                ConvertPolicy::WRAP,
+        })),
+        framework::dataset::make("Expected", { true, true, false, false, false, false, false})),
+        input1_info, input2_info, output_info, policy, expected)
 {
-    ARM_COMPUTE_EXPECT(bool(NEArithmeticSubtraction::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), ConvertPolicy::WRAP)) == expected, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bool(NEArithmeticSubtraction::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), policy)) == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
 // *INDENT-ON*
@@ -124,6 +155,45 @@
 }
 TEST_SUITE_END() // U8
 
+using NEArithmeticSubtractionQuantFixture = ArithmeticSubtractionQuantValidationFixture<Tensor, Accessor, NEArithmeticSubtraction>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+               shape, policy)
+{
+    // Create tensors
+    Tensor ref_src1 = create_tensor<Tensor>(shape, DataType::QASYMM8);
+    Tensor ref_src2 = create_tensor<Tensor>(shape, DataType::QASYMM8);
+    Tensor dst      = create_tensor<Tensor>(shape, DataType::QASYMM8);
+
+    // Create and Configure function
+    NEArithmeticSubtraction sub;
+    sub.configure(&ref_src1, &ref_src2, &dst, policy);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(ref_src1.info()->padding(), padding);
+    validate(ref_src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionQuantFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(
+                                                                                                                     datasets::SmallShapes(),
+                                                                                                                     ArithmeticSubtractionQASYMM8Dataset),
+                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+                                                                                                                 ArithmeticSubtractionQuantizationInfoDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // Quantized
+
 TEST_SUITE(S16)
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
diff --git a/tests/validation/NEON/BatchToSpaceLayer.cpp b/tests/validation/NEON/BatchToSpaceLayer.cpp
new file mode 100644
index 0000000..b5d684a
--- /dev/null
+++ b/tests/validation/NEON/BatchToSpaceLayer.cpp
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BatchToSpaceDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/BatchToSpaceLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(NEON)
+TEST_SUITE(BatchToSpaceLayer)
+
+template <typename T>
+using NEBatchToSpaceLayerFixture = BatchToSpaceLayerValidationFixture<Tensor, Accessor, NEBatchToSpaceLayer, T>;
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32),    // blockx != blocky && blockx > blocky
+                                                       TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32),    // blockx != blocky && blocky > blockx
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),     // Mismatching data types
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),     // Wrong data type block shape
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U, 4U), 1, DataType::F32), // Wrong tensor shape
+                                                     }),
+               framework::dataset::make("BlockShapeInfo",{ TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(2U, 4U), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(4U, 2U), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(2U, 2U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
+                                                     })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(64U, 16U, 2U, 1U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 32U, 2U, 1U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
+                                                     })),
+               framework::dataset::make("Expected", { true, true, true, false, false, false})),
+               input_info, block_shape_info, output_info, expected)
+{
+    bool has_error = bool(NEBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), &block_shape_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false)));
+    ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS);
+}
+DATA_TEST_CASE(ValidateStatic, framework::DatasetMode::ALL, zip(zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32),    // blockx != blocky && blockx > blocky
+                                                       TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32),    // blockx != blocky && blocky > blockx
+                                                       TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32),    // Mismatching data types
+                                                       TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32),    // Negative block shapes
+                                                       TensorInfo(TensorShape(32U, 16U, 2U, 4U, 4U), 1, DataType::F32), // Wrong tensor shape
+                                                     }),
+               framework::dataset::make("BlockShapeX", { 2, 4, 2, 2, 2, 2 })),
+               framework::dataset::make("BlockShapeY", { 2, 2, 4, 2, -2, 2 })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(64U, 16U, 2U, 1U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 32U, 2U, 1U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 8U, 2U, 1U), 1, DataType::F32),
+                                                     })),
+               framework::dataset::make("Expected", { true, true, true, false, false, false})),
+               input_info, block_shape_x, block_shape_y, output_info, expected)
+{
+    bool has_error = bool(NEBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), block_shape_x, block_shape_y, &output_info.clone()->set_is_resizable(false)));
+    ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchToSpaceLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchToSpaceLayerDataset(), framework::dataset::make("DataType",
+                                                                                                                       DataType::F32)),
+                                                                                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEBatchToSpaceLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeBatchToSpaceLayerDataset(), framework::dataset::make("DataType",
+                                                                                                                     DataType::F32)),
+                                                                                                             framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchToSpaceLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchToSpaceLayerDataset(), framework::dataset::make("DataType",
+                                                                                                                      DataType::F16)),
+                                                                                                              framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEBatchToSpaceLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeBatchToSpaceLayerDataset(), framework::dataset::make("DataType",
+                                                                                                                    DataType::F16)),
+                                                                                                            framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END() // BatchToSpace
+TEST_SUITE_END() // NEON
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/Convolution.cpp b/tests/validation/NEON/Convolution.cpp
index 3a9f29c..b942ddc 100644
--- a/tests/validation/NEON/Convolution.cpp
+++ b/tests/validation/NEON/Convolution.cpp
@@ -66,14 +66,14 @@
     Tensor dst = create_tensor<Tensor>(shape, output_data_type);
 
     // Create conv matrix
-    int16_t conv[9] = {};
+    std::array<int16_t, 9> conv = { 0 };
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
     // Create and configure function
     NEConvolution3x3 convolution;
-    convolution.configure(&src, &dst, conv, 0, border_mode);
+    convolution.configure(&src, &dst, conv.data(), 0, border_mode);
 
     // Validate valid region
     const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_size / 2));
@@ -134,14 +134,14 @@
     Tensor dst = create_tensor<Tensor>(shape, output_data_type);
 
     // Create conv matrix
-    int16_t conv[25] = {};
+    std::array<int16_t, 25> conv = { 0 };
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
     // Create and configure function
     NEConvolution5x5 convolution;
-    convolution.configure(&src, &dst, conv, 0, border_mode);
+    convolution.configure(&src, &dst, conv.data(), 0, border_mode);
 
     // Validate valid region
     const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_size / 2));
@@ -202,14 +202,14 @@
     Tensor dst = create_tensor<Tensor>(shape, output_data_type);
 
     // Create conv matrix
-    int16_t conv[49] = {};
+    std::array<int16_t, 49> conv = { 0 };
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
     // Create and configure function
     NEConvolution7x7 convolution;
-    convolution.configure(&src, &dst, conv, 0, border_mode);
+    convolution.configure(&src, &dst, conv.data(), 0, border_mode);
 
     // Validate valid region
     const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_size / 2));
@@ -270,14 +270,14 @@
     Tensor dst = create_tensor<Tensor>(shape, output_data_type);
 
     // Create conv matrix
-    int16_t conv[81] = {};
+    std::array<int16_t, 81> conv = { 0 };
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
     // Create and configure function
     NEConvolution9x9 convolution;
-    convolution.configure(&src, &dst, conv, 0, border_mode);
+    convolution.configure(&src, &dst, conv.data(), 0, border_mode);
 
     // Validate valid region
     const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_size / 2));
diff --git a/tests/validation/NEON/CropResize.cpp b/tests/validation/NEON/CropResize.cpp
new file mode 100644
index 0000000..1feed3d
--- /dev/null
+++ b/tests/validation/NEON/CropResize.cpp
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NECropResize.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "tests/NEON/Accessor.h"
+#include "tests/datasets/CropResizeDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/CropResizeFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(NEON)
+TEST_SUITE(CropResize)
+
+RelativeTolerance<float> tolerance_fp32(0.001f);
+
+template <typename T>
+using NECropResizeFixture = CropResizeFixture<Tensor, Accessor, NECropResize, T>;
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::U8),  // Invalid input data type.
+                                                       TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32), // Invalid box_ind shape.
+                                                       TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32), // Invalid output shape.
+                                                       TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32), // Invalid output data type.
+                                                       TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32), // Invalid output shape.
+                                                       TensorInfo(TensorShape(15U, 30U, 40U, 10U), 1, DataType::S32), // Invalid boxes shape.
+                                                     }),
+               framework::dataset::make("BoxesInfo",{  TensorInfo(TensorShape(4, 20), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(4, 20), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(4, 20), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(4, 20), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(4, 20), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(4, 20), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(3, 20), 1, DataType::F32),
+                                                     })),
+               framework::dataset::make("BoxIndInfo",{ TensorInfo(TensorShape(20), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(20), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(10), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(20), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(20), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(20), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(20), 1, DataType::S32),
+                                                     })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(15U, 5, 5, 10U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(5U, 5, 5, 20U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(15U, 5, 5, 20U), 1, DataType::F32),
+                                                     })),
+               framework::dataset::make("Expected", { true, false, false, false, false, false, false})),
+               input, boxes, box_ind, output, expected)
+{
+    ARM_COMPUTE_EXPECT(bool(NECropResize::validate(&input.clone()->set_data_layout(DataLayout::NHWC).set_is_resizable(false),
+                                                   &boxes.clone()->set_is_resizable(false),
+                                                   &box_ind.clone()->set_is_resizable(false),
+                                                   &output.clone()->set_data_layout(DataLayout::NHWC).set_is_resizable(false),
+                                                   Coordinates2D{ 5, 5 }, InterpolationPolicy::BILINEAR, 100)) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+TEST_SUITE(Float)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+TEST_SUITE(F16)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       NECropResizeFixture<half>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallCropResizeDataset(),
+                               combine(framework::dataset::make("IsOutOfBounds", { true, false }),
+                                       framework::dataset::make("DataType", DataType::F16))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
+}
+TEST_SUITE_END() // F16
+#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
+TEST_SUITE(F32)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       NECropResizeFixture<float>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallCropResizeDataset(),
+                               combine(framework::dataset::make("IsOutOfBounds", { true, false }),
+                                       framework::dataset::make("DataType", DataType::F32))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
+}
+TEST_SUITE_END() // F32
+TEST_SUITE_END() // Float
+
+TEST_SUITE(U16)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       NECropResizeFixture<uint16_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallCropResizeDataset(),
+                               combine(framework::dataset::make("IsOutOfBounds", { true, false }),
+                                       framework::dataset::make("DataType", DataType::U16))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
+}
+TEST_SUITE_END() // U16
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       NECropResizeFixture<int16_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallCropResizeDataset(),
+                               combine(framework::dataset::make("IsOutOfBounds", { true, false }),
+                                       framework::dataset::make("DataType", DataType::S16))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
+}
+TEST_SUITE_END() // S16
+
+TEST_SUITE(U32)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       NECropResizeFixture<uint32_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallCropResizeDataset(),
+                               combine(framework::dataset::make("IsOutOfBounds", { true, false }),
+                                       framework::dataset::make("DataType", DataType::U32))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
+}
+TEST_SUITE_END() // U32
+
+TEST_SUITE(S32)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+                       NECropResizeFixture<int32_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallCropResizeDataset(),
+                               combine(framework::dataset::make("IsOutOfBounds", { true, false }),
+                                       framework::dataset::make("DataType", DataType::S32))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp32, 0.01);
+}
+TEST_SUITE_END() // S32
+
+TEST_SUITE_END() // CropResize
+TEST_SUITE_END() // NEON
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp
index 4a05535..fc37c02 100644
--- a/tests/validation/NEON/DeconvolutionLayer.cpp
+++ b/tests/validation/NEON/DeconvolutionLayer.cpp
@@ -44,6 +44,8 @@
 namespace
 {
 constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
+constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+constexpr float                    tolerance_num = 0.07f;  /**< Tolerance number */
 
 const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3)
                      * framework::dataset::make("PadY", 0, 3) * framework::dataset::make("NumKernels", { 3 });
@@ -213,6 +215,62 @@
 TEST_SUITE_END() // FP32
 TEST_SUITE_END() // Float
 
+template <typename T>
+using NEDeconvolutionLayerQuantizedFixture4x4 = DeconvolutionValidationQuantizedFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 4, 4>;
+
+template <typename T>
+using NEDeconvolutionLayerQuantizedFixture3x3 = DeconvolutionValidationQuantizedFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 3, 3>;
+
+template <typename T>
+using NEDeconvolutionLayerQuantizedFixture1x1 = DeconvolutionValidationQuantizedFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 1, 1>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+
+TEST_SUITE(W4x4)
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture4x4<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data4x4, framework::dataset::make("DataType",
+                                                                                                                       DataType::QASYMM8)),
+                                                                                                                       data_layouts_dataset),
+                                                                                                                       framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W4x4
+
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data3x3_precommit, framework::dataset::make("DataType",
+                       DataType::QASYMM8)),
+                       data_layouts_dataset),
+                       framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3, framework::dataset::make("DataType",
+                       DataType::QASYMM8)),
+                       data_layouts_dataset),
+                       framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W3x3
+
+TEST_SUITE(W1x1)
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture1x1<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data1x1, framework::dataset::make("DataType",
+                                                                                                                       DataType::QASYMM8)),
+                                                                                                                       data_layouts_dataset),
+                                                                                                                       framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0))))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W1x1
+
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // Quantized
+
 TEST_SUITE_END() // DeconvolutionLayer
 TEST_SUITE_END() // NEON
 } // namespace validation
diff --git a/tests/validation/NEON/DepthConcatenateLayer.cpp b/tests/validation/NEON/DepthConcatenateLayer.cpp
index 24e7649..844c398 100644
--- a/tests/validation/NEON/DepthConcatenateLayer.cpp
+++ b/tests/validation/NEON/DepthConcatenateLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,7 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/NEON/Accessor.h"
@@ -31,7 +31,7 @@
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/DepthConcatenateLayerFixture.h"
+#include "tests/validation/fixtures/ConcatenateLayerFixture.h"
 
 namespace arm_compute
 {
@@ -55,66 +55,48 @@
                                                   TensorInfo(TensorShape(23U, 27U, 4U), 1, DataType::F32),
                                                   TensorInfo(TensorShape(16U, 27U, 6U), 1, DataType::F32)
         })),
-                                                              framework::dataset::make("OutputInfo", {  TensorInfo(TensorShape(23U, 27U, 9U), 1, DataType::F16),
-                                                                                                        TensorInfo(TensorShape(25U, 12U, 9U), 1, DataType::F32),
-                                                                                                        TensorInfo(TensorShape(23U, 27U, 8U), 1, DataType::F32),
-                                                                                                        TensorInfo(TensorShape(16U, 27U, 12U), 1, DataType::F32)
-                                                              })),
-                                                          framework::dataset::make("Expected", { false, false, false, true })),
-               input_info1, input_info2, output_info,expected)
+        framework::dataset::make("OutputInfo", {  TensorInfo(TensorShape(23U, 27U, 9U), 1, DataType::F16),
+                                                  TensorInfo(TensorShape(25U, 12U, 9U), 1, DataType::F32),
+                                                  TensorInfo(TensorShape(23U, 27U, 8U), 1, DataType::F32),
+                                                  TensorInfo(TensorShape(16U, 27U, 12U), 1, DataType::F32)
+        })),
+        framework::dataset::make("Expected", { false, false, false, true })),
+        input_info1, input_info2, output_info,expected)
 {
     std::vector<TensorInfo> inputs_vector_info;
     inputs_vector_info.emplace_back(std::move(input_info1));
     inputs_vector_info.emplace_back(std::move(input_info2));
 
     std::vector<ITensorInfo *> inputs_vector_info_raw;
+    inputs_vector_info_raw.reserve(inputs_vector_info.size());
     for(auto &input : inputs_vector_info)
     {
         inputs_vector_info_raw.emplace_back(&input);
     }
 
-    bool is_valid = bool(NEDepthConcatenateLayer::validate(inputs_vector_info_raw,
-                                                           &output_info.clone()->set_is_resizable(false)));
+    bool is_valid = bool(NEConcatenateLayer::validate(inputs_vector_info_raw, &output_info.clone()->set_is_resizable(false), 2));
     ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
 // *INDENT-ON*
 
-TEST_CASE(Configuration, framework::DatasetMode::ALL)
-{
-    // Create tensors
-    Tensor src1 = create_tensor<Tensor>(TensorShape(32U, 32U, 128U), DataType::F32, 1);
-    Tensor src2 = create_tensor<Tensor>(TensorShape(32U, 32U, 32U), DataType::F32, 1);
-    Tensor dst;
-
-    ARM_COMPUTE_EXPECT(src1.info()->is_resizable(), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(src2.info()->is_resizable(), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
-    // Create and configure function
-    NEDepthConcatenateLayer concat_layer;
-
-    concat_layer.configure({ &src1, &src2 }, &dst);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(TensorShape(32U, 32U, 160U));
-    validate(dst.info()->valid_region(), valid_region);
-}
-
 template <typename T>
-using NEDepthConcatenateLayerFixture = DepthConcatenateLayerValidationFixture<Tensor, ITensor, Accessor, NEDepthConcatenateLayer, T>;
+using NEDepthConcatenateLayerFixture = ConcatenateLayerValidationFixture<Tensor, ITensor, Accessor, NEConcatenateLayer, T>;
 
 TEST_SUITE(Float)
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
-                                                                                                                  DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
+                                                                                                                  framework::dataset::make("DataType",
+                                                                                                                          DataType::F16)),
+                                                                                                                  framework::dataset::make("Axis", 2)))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType",
-                                                                                                                DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType",
+                                                                                                                        DataType::F16)),
+                                                                                                                framework::dataset::make("Axis", 2)))
 {
     // Validate output
     validate(Accessor(_target), _reference);
@@ -123,14 +105,17 @@
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
-                                                                                                                   DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Tiny4DShapes()),
+                                                                                                                   framework::dataset::make("DataType",
+                                                                                                                           DataType::F32)),
+                                                                                                                   framework::dataset::make("Axis", 2)))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType",
-                                                                                                                 DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::F32)),
+                                                                                                                 framework::dataset::make("Axis", 2)))
 {
     // Validate output
     validate(Accessor(_target), _reference);
@@ -140,14 +125,18 @@
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
-                                                                                                                     DataType::QASYMM8)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Tiny4DShapes()),
+                                                                                                                     framework::dataset::make("DataType",
+                                                                                                                             DataType::QASYMM8)),
+                                                                                                                     framework::dataset::make("Axis", 2)))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType",
-                                                                                                                   DataType::QASYMM8)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(),
+                                                                                                                   framework::dataset::make("DataType",
+                                                                                                                           DataType::QASYMM8)),
+                                                                                                                   framework::dataset::make("Axis", 2)))
 {
     // Validate output
     validate(Accessor(_target), _reference);
diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
index 6c0f590..8eefec3 100644
--- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
@@ -29,6 +29,7 @@
 #include "tests/NEON/Accessor.h"
 #include "tests/PaddingCalculator.h"
 #include "tests/datasets/DepthwiseConvolutionLayerDataset.h"
+#include "tests/datasets/DilatedDepthwiseConvolutionLayerDataset.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
@@ -53,14 +54,21 @@
 #endif                                                                     // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
 const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 3 });
+
+//Activation Functions
+const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+{
+    ActivationLayerInfo(),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
+});
 } // namespace
 
 TEST_SUITE(NEON)
-TEST_SUITE(DepthwiseConvLayer)
+TEST_SUITE(DepthwiseConvolutionLayer)
 
 // *INDENT-OFF*
 // clang-format off
-DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
+DATA_TEST_CASE(Validate3x3, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
                framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),     // Mismatching data type input/weights
                                                        TensorInfo(TensorShape(32U, 18U, 3U), 1, DataType::F32),     // Mismatching input feature maps
                                                        TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),     // Unsupported weights dimensions
@@ -69,6 +77,8 @@
                                                        TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),     // Invalid biases size
                                                        TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),     // Invalid biases dimensions
                                                        TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),     // Invalid output size
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // patch size bigger than input width
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // dilation < 1
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
                                                      }),
                framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16),
@@ -80,6 +90,8 @@
                                                          TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32),
                                                          TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32),
                                                          TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32),
+                                                         TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32),
+                                                         TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F32),
                                                        })),
                framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(2U), 1, DataType::F32),
@@ -90,6 +102,8 @@
                                                         TensorInfo(TensorShape(2U, 2U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(2U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(2U), 1, DataType::F32),
+                                                        TensorInfo(TensorShape(2U), 1, DataType::F32),
+                                                        TensorInfo(TensorShape(2U), 1, DataType::F32),
                                                       })),
                framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
@@ -100,6 +114,8 @@
                                                         TensorInfo(TensorShape(30U, 16U, 2U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(32U, 18U, 2U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+                                                        TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+                                                        TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
                                                       })),
                framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
                                                       PadStrideInfo(1, 1, 0, 0),
@@ -110,6 +126,8 @@
                                                       PadStrideInfo(1, 1, 0, 0),
                                                       PadStrideInfo(1, 1, 0, 0),
                                                       PadStrideInfo(1, 1, 0, 0),
+                                                      PadStrideInfo(1, 1, 0, 0),
+                                                      PadStrideInfo(1, 1, 0, 0),
                                                      })),
                framework::dataset::make("DepthMultiplier", { 1,
                                                              1,
@@ -120,21 +138,38 @@
                                                              1,
                                                              1,
                                                              1,
+                                                             1,
+                                                             1,
                                                             })),
-               framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, true })),
-               input_info, weights_info, biases_info, output_info, conv_info, depth_multiplier, expected)
+               framework::dataset::make("Dilation", { Size2D(1U, 1U),
+                                                      Size2D(1U, 1U),
+                                                      Size2D(1U, 1U),
+                                                      Size2D(1U, 1U),
+                                                      Size2D(1U, 1U),
+                                                      Size2D(1U, 1U),
+                                                      Size2D(1U, 1U),
+                                                      Size2D(1U, 1U),
+                                                      Size2D(25U, 1U),
+                                                      Size2D(0U, 1U),
+                                                      Size2D(1U, 1U),
+                                                            })),
+               framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false, false, true })),
+               input_info, weights_info, biases_info, output_info, conv_info, depth_multiplier,dilation, expected)
 {
-    bool is_valid = bool(NEDepthwiseConvolutionLayer3x3::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, depth_multiplier));
+    bool is_valid = bool(NEDepthwiseConvolutionLayer3x3::validate(&input_info.clone()->set_is_resizable(false),
+     &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, depth_multiplier, ActivationLayerInfo(), dilation));
     ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
 }
 
-DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
+DATA_TEST_CASE(ValidateGeneric, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
                 framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Mismatching data type input/weights
                                                         TensorInfo(TensorShape(27U, 13U, 3U), 1, DataType::F32),    // Mismatching input feature maps
                                                         TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Mismatching depth multiplier
                                                         TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Invalid biases size
                                                         TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Invalid biases dimensions
                                                         TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Invalid output size
+                                                        TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32),    // patch size bigger than input width
+                                                        TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32),    // dilation < 1
                                                         TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(32U, 13U, 8U), 1, DataType::QASYMM8),
                                                       }),
@@ -145,6 +180,8 @@
                                                           TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
                                                           TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
                                                           TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
+                                                          TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
+                                                          TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
                                                           TensorInfo(TensorShape(3U, 3U, 24U), 1, DataType::QASYMM8),
                                                         })),
                 framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32),
@@ -154,6 +191,8 @@
                                                          TensorInfo(TensorShape(2U, 2U), 1, DataType::F32),
                                                          TensorInfo(TensorShape(2U), 1, DataType::F32),
                                                          TensorInfo(TensorShape(16U), 1, DataType::F32),
+                                                         TensorInfo(TensorShape(16U), 1, DataType::F32),
+                                                         TensorInfo(TensorShape(16U), 1, DataType::F32),
                                                          TensorInfo(TensorShape(24U), 1, DataType::S32),
                                                        })),
                 framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
@@ -163,6 +202,8 @@
                                                          TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
                                                          TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
                                                          TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
+                                                         TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
+                                                         TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
                                                          TensorInfo(TensorShape(32U, 11U, 24U), 1, DataType::QASYMM8),
                                                        })),
                 framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
@@ -172,6 +213,8 @@
                                                        PadStrideInfo(1, 1, 0, 0),
                                                        PadStrideInfo(1, 1, 0, 0),
                                                        PadStrideInfo(1, 1, 0, 0),
+                                                       PadStrideInfo(1, 1, 0, 0),
+                                                       PadStrideInfo(1, 1, 0, 0),
                                                        PadStrideInfo(1, 1, 1, 0),
                                                       })),
                 framework::dataset::make("DepthMultiplier", { 1,
@@ -181,12 +224,25 @@
                                                               1,
                                                               1,
                                                               2,
+                                                              2,
+                                                              2,
                                                               3,
                                                              })),
-                framework::dataset::make("Expected", { false, false, false, false, false, false, true, true })),
-                input_info, weights_info, biases_info, output_info, conv_info, depth_multiplier, expected)
+                framework::dataset::make("Dilation", { Size2D(1U, 1U),
+                                                       Size2D(1U, 1U),
+                                                       Size2D(1U, 1U),
+                                                       Size2D(1U, 1U),
+                                                       Size2D(1U, 1U),
+                                                       Size2D(1U, 1U),
+                                                       Size2D(25U, 1U),
+                                                       Size2D(0U, 1U),
+                                                       Size2D(1U, 1U),
+                                                       Size2D(1U, 1U),
+                                                             })),
+                framework::dataset::make("Expected", { false, false, false, false, false, false,false, false, true, true })),
+                input_info, weights_info, biases_info, output_info, conv_info, depth_multiplier,dilation, expected)
 {
-    bool is_valid = bool(NEDepthwiseConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, depth_multiplier));
+    bool is_valid = bool(NEDepthwiseConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, depth_multiplier, ActivationLayerInfo(), dilation));
     ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
@@ -197,58 +253,108 @@
 TEST_SUITE(Generic)
 template <typename T>
 using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T>;
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
                                                                                                                        depth_multipliers),
                                                                                                                        framework::dataset::make("DataType",
                                                                                                                                DataType::F32)),
-                                                                                                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                       ActivationFunctionsDataset))
 {
     validate(Accessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
                                                                                                                      depth_multipliers),
                                                                                                                      framework::dataset::make("DataType",
                                                                                                                              DataType::F32)),
-                                                                                                                     framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                                     framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                     ActivationFunctionsDataset))
 {
     validate(Accessor(_target), _reference, tolerance_f32);
 }
+
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+                                                                                                                 depth_multipliers),
+                                                                                                                 framework::dataset::make("DataType",
+                                                                                                                         DataType::F32)),
+                                                                                                                 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                 ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
+                                                                                                                     depth_multipliers),
+                                                                                                                     framework::dataset::make("DataType",
+                                                                                                                             DataType::F32)),
+                                                                                                                     framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                     ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END() // Dilation
 TEST_SUITE_END() // Generic
 
 TEST_SUITE(W3x3)
 template <typename T>
 using NEDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer3x3, T>;
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
                                                                                                                     depth_multipliers),
                                                                                                                     framework::dataset::make("DataType",
                                                                                                                             DataType::F32)),
-                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                    ActivationFunctionsDataset))
 {
     validate(Accessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
                                                                                                                         depth_multipliers),
                                                                                                                         framework::dataset::make("DataType",
                                                                                                                                 DataType::F32)),
-                                                                                                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                        ActivationFunctionsDataset))
 {
     validate(Accessor(_target), _reference, tolerance_f32);
 }
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                                                                                    depth_multipliers),
+                                                                                                                    framework::dataset::make("DataType",
+                                                                                                                            DataType::F32)),
+                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                    ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                       depth_multipliers),
+                                               framework::dataset::make("DataType",
+                                                                        DataType::F32)),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE_END() // Dilation
+
 FIXTURE_DATA_TEST_CASE(RunOptimizedSmall, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
-                                               framework::dataset::make("DepthMultiplier", 1)),
-                                       framework::dataset::make("DataType",
-                                                                DataType::F32)),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                                       framework::dataset::make("DepthMultiplier", 1)),
+                                               framework::dataset::make("DataType",
+                                                                        DataType::F32)),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
 {
     validate(Accessor(_target), _reference, tolerance_f32);
 }
 FIXTURE_DATA_TEST_CASE(RunOptimizedLarge, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
-                                               framework::dataset::make("DepthMultiplier", 1)),
-                                       framework::dataset::make("DataType",
-                                                                DataType::F32)),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                                       framework::dataset::make("DepthMultiplier", 1)),
+                                               framework::dataset::make("DataType",
+                                                                        DataType::F32)),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
 {
     validate(Accessor(_target), _reference, tolerance_f32);
 }
@@ -260,57 +366,110 @@
 TEST_SUITE(Generic)
 template <typename T>
 using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T>;
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
                                                                                                                       depth_multipliers),
                                                                                                                       framework::dataset::make("DataType",
                                                                                                                               DataType::F16)),
-                                                                                                                      framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                                      framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                      ActivationFunctionsDataset))
 {
     validate(Accessor(_target), _reference, tolerance_f16, tolerance_num);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
                                                                                                                     depth_multipliers),
                                                                                                                     framework::dataset::make("DataType",
                                                                                                                             DataType::F16)),
-                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                    ActivationFunctionsDataset))
 {
     validate(Accessor(_target), _reference, tolerance_f16, tolerance_num);
 }
+
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+                                                                                                                        depth_multipliers),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::F16)),
+                                                                                                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
+                                                                                                                    depth_multipliers),
+                                                                                                                    framework::dataset::make("DataType",
+                                                                                                                            DataType::F16)),
+                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                    ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+TEST_SUITE_END() // Dilation
+
 TEST_SUITE_END() // Generic
 TEST_SUITE(W3x3)
 template <typename T>
 using NEDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer3x3, T>;
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
                                                                                                                    depth_multipliers),
                                                                                                                    framework::dataset::make("DataType",
                                                                                                                            DataType::F16)),
-                                                                                                                   framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                                   framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                   ActivationFunctionsDataset))
 {
     validate(Accessor(_target), _reference, tolerance_f16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
                                                                                                                        depth_multipliers),
                                                                                                                        framework::dataset::make("DataType",
                                                                                                                                DataType::F16)),
-                                                                                                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                       ActivationFunctionsDataset))
 {
     validate(Accessor(_target), _reference, tolerance_f16);
 }
+
+TEST_SUITE(Dilation)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                                                                                   depth_multipliers),
+                                                                                                                   framework::dataset::make("DataType",
+                                                                                                                           DataType::F16)),
+                                                                                                                   framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                   ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                       depth_multipliers),
+                                               framework::dataset::make("DataType",
+                                                                        DataType::F16)),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_f16);
+}
+
+TEST_SUITE_END() // Dilation
+
 FIXTURE_DATA_TEST_CASE(RunOptimizedSmall, NEDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
-                                               framework::dataset::make("DepthMultiplier", 1)),
-                                       framework::dataset::make("DataType",
-                                                                DataType::F16)),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                                       framework::dataset::make("DepthMultiplier", 1)),
+                                               framework::dataset::make("DataType",
+                                                                        DataType::F16)),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
 {
     validate(Accessor(_target), _reference, tolerance_f16);
 }
 FIXTURE_DATA_TEST_CASE(RunOptimizedLarge, NEDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
-                                               framework::dataset::make("DepthMultiplier", 1)),
-                                       framework::dataset::make("DataType",
-                                                                DataType::F16)),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                                       framework::dataset::make("DepthMultiplier", 1)),
+                                               framework::dataset::make("DataType",
+                                                                        DataType::F16)),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
 {
     validate(Accessor(_target), _reference, tolerance_f16);
 }
@@ -329,53 +488,104 @@
 TEST_SUITE(QASYMM8)
 TEST_SUITE(Generic)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
-                                                       depth_multipliers),
-                                               framework::dataset::make("DataType", DataType::QASYMM8)),
-                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                                               depth_multipliers),
+                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
 {
     validate(Accessor(_target), _reference, tolerance_qasymm8);
 }
+
+TEST_SUITE(Dilation)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+                                                               depth_multipliers),
+                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
+                                                               depth_multipliers),
+                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() //Dilation
 TEST_SUITE_END() // Generic
 TEST_SUITE(W3x3)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers),
-                                               framework::dataset::make("DataType", DataType::QASYMM8)),
-                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers),
+                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
 {
     validate(Accessor(_target), _reference, tolerance_qasymm8);
 }
 FIXTURE_DATA_TEST_CASE(RunOptimizedSmall, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
-                                                       framework::dataset::make("DepthMultiplier", 1)),
-                                               framework::dataset::make("DataType",
-                                                                        DataType::QASYMM8)),
-                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                                               framework::dataset::make("DepthMultiplier", 1)),
+                                                       framework::dataset::make("DataType",
+                                                                                DataType::QASYMM8)),
+                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
 {
     validate(Accessor(_target), _reference, tolerance_qasymm8);
 }
 FIXTURE_DATA_TEST_CASE(RunOptimizedLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
-                                                       framework::dataset::make("DepthMultiplier", 1)),
-                                               framework::dataset::make("DataType",
-                                                                        DataType::QASYMM8)),
-                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(),
+                                                               framework::dataset::make("DepthMultiplier", 1)),
+                                                       framework::dataset::make("DataType",
+                                                                                DataType::QASYMM8)),
+                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
 {
     validate(Accessor(_target), _reference, tolerance_qasymm8);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
-                                                       depth_multipliers),
-                                               framework::dataset::make("DataType", DataType::QASYMM8)),
-                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                                                               depth_multipliers),
+                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
 {
     validate(Accessor(_target), _reference, tolerance_qasymm8);
 }
+
+TEST_SUITE(Dilation)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers),
+                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                               depth_multipliers),
+                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsDataset))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // Dilation
 TEST_SUITE_END() // W3x3
 TEST_SUITE_END() // QASYMM8
 TEST_SUITE_END() // Quantized
diff --git a/tests/validation/NEON/DequantizationLayer.cpp b/tests/validation/NEON/DequantizationLayer.cpp
index 48a6b22..0ae20b7 100644
--- a/tests/validation/NEON/DequantizationLayer.cpp
+++ b/tests/validation/NEON/DequantizationLayer.cpp
@@ -42,8 +42,11 @@
 {
 namespace
 {
-/** Tolerance for float operations */
-constexpr AbsoluteTolerance<float> tolerance_f32(0.001f);
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
+#else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 } // namespace
 
 TEST_SUITE(NEON)
@@ -51,96 +54,91 @@
 
 // *INDENT-OFF*
 // clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
-               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),  // Wrong input data type
-                                                       TensorInfo(TensorShape(16U, 5U, 16U), 1, DataType::U8),  // Invalid shape
-                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::U8),  // Wrong output data type
-                                                       TensorInfo(TensorShape(16U, 16U, 2U, 5U), 1, DataType::U8),  // Missmatching shapes
-                                                       TensorInfo(TensorShape(17U, 16U, 16U, 5U), 1, DataType::U8),  // Shrink window
-                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::U8),  // Valid
-                                                     }),
-               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(16U, 5U, 16U), 1, DataType::U8),
-                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::U8),
-                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(17U, 16U, 16U, 5U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),
-                                                     })),
-               framework::dataset::make("MinMax",{ TensorInfo(TensorShape(2U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(2U), 1, DataType::U8),
-                                                       TensorInfo(TensorShape(2U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(2U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(2U), 1, DataType::U8),
-                                                       TensorInfo(TensorShape(2U), 1, DataType::U8),
-                                                     })),
-               framework::dataset::make("Expected", { false, false, false, false, false, true})),
-               input_info, output_info, min_max, expected)
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
+        framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),      // Wrong input data type
+                                                TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8),  // Wrong output data type
+                                                TensorInfo(TensorShape(16U, 16U, 2U, 5U), 1, DataType::QASYMM8),   // Missmatching shapes
+                                                TensorInfo(TensorShape(17U, 16U, 16U, 5U), 1, DataType::QASYMM8),  // Valid
+                                                TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8),  // Valid
+        }),
+        framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),
+                                                TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::U8),
+                                                TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),
+                                                TensorInfo(TensorShape(17U, 16U, 16U, 5U), 1, DataType::F32),
+                                                TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),
+        })),
+        framework::dataset::make("Expected", { false, false, false, true, true})),
+        input_info, output_info, expected)
 {
-    ARM_COMPUTE_EXPECT(bool(NEDequantizationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), &min_max.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bool(NEDequantizationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
 // *INDENT-ON*
 
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::Small3DShapes(), framework::dataset::make("DataType", DataType::U8)), shape, data_type)
+DATA_TEST_CASE(Configuration,
+               framework::DatasetMode::ALL,
+               combine(datasets::SmallShapes(), data_types),
+               shape, data_type)
 {
-    TensorShape shape_min_max = shape;
-    shape_min_max.set(Window::DimX, 2);
-
-    // Remove Y and Z dimensions and keep the batches
-    shape_min_max.remove_dimension(1);
-    shape_min_max.remove_dimension(1);
-
     // Create tensors
-    Tensor src     = create_tensor<Tensor>(shape, data_type);
-    Tensor dst     = create_tensor<Tensor>(shape, DataType::F32);
-    Tensor min_max = create_tensor<Tensor>(shape_min_max, DataType::F32);
+    Tensor src = create_tensor<Tensor>(shape, DataType::QASYMM8, 1, QuantizationInfo(0.5f, -10));
+    Tensor dst = create_tensor<Tensor>(shape, data_type);
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(min_max.info()->is_resizable(), framework::LogLevel::ERRORS);
 
     // Create and configure function
     NEDequantizationLayer dequant_layer;
-    dequant_layer.configure(&src, &dst, &min_max);
+    dequant_layer.configure(&src, &dst);
 
     // Validate valid region
     const ValidRegion valid_region = shape_to_valid_region(shape);
     validate(src.info()->valid_region(), valid_region);
     validate(dst.info()->valid_region(), valid_region);
 
-    // Validate valid region of min_max tensor
-    const ValidRegion valid_region_min_max = shape_to_valid_region(shape_min_max);
-    validate(min_max.info()->valid_region(), valid_region_min_max);
-
     // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 8).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-
-    // Validate padding of min_max tensor
-    const PaddingSize padding_min_max = PaddingCalculator(shape_min_max.x(), 2).required_padding();
-    validate(min_max.info()->padding(), padding_min_max);
+    validate(src.info()->padding(), PaddingSize());
+    validate(dst.info()->padding(), PaddingSize());
 }
 
 template <typename T>
 using NEDequantizationLayerFixture = DequantizationValidationFixture<Tensor, Accessor, NEDequantizationLayer, T>;
 
-TEST_SUITE(Integer)
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDequantizationLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
-                                                                                                                   framework::dataset::make("DataType", DataType::U8)))
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDequantizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
+                                                                                                                        framework::dataset::make("DataType", DataType::F16)),
+                                                                                                                framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) })))
 {
     // Validate output
-    validate(Accessor(_target), _reference, tolerance_f32);
+    validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDequantizationLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(concat(datasets::Large3DShapes(), datasets::Large4DShapes()),
-                                                                                                                 framework::dataset::make("DataType", DataType::U8)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDequantizationLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
+                                                                                                                      framework::dataset::make("DataType", DataType::F16)),
+                                                                                                              framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) })))
 {
     // Validate output
-    validate(Accessor(_target), _reference, tolerance_f32);
+    validate(Accessor(_target), _reference);
 }
-TEST_SUITE_END() // U8
-TEST_SUITE_END() // Integer
+TEST_SUITE_END() // FP16
+#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDequantizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
+                                                                                                                 framework::dataset::make("DataType", DataType::F32)),
+                                                                                                                 framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDequantizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
+                                                                                                                       framework::dataset::make("DataType", DataType::F32)),
+                                                                                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // FP32
 
 TEST_SUITE_END() // DequantizationLayer
 TEST_SUITE_END() // NEON
diff --git a/tests/validation/NEON/FFT.cpp b/tests/validation/NEON/FFT.cpp
new file mode 100644
index 0000000..d762630
--- /dev/null
+++ b/tests/validation/NEON/FFT.cpp
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEFFT1D.h"
+#include "arm_compute/runtime/NEON/functions/NEFFT2D.h"
+#include "arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/datasets/SmallConvolutionLayerDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/FFTFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
+const auto shapes_1d  = framework::dataset::make("TensorShape", { TensorShape(2U, 2U, 3U), TensorShape(3U, 2U, 3U),
+                                                                  TensorShape(4U, 2U, 3U), TensorShape(5U, 2U, 3U),
+                                                                  TensorShape(7U, 2U, 3U), TensorShape(8U, 2U, 3U),
+                                                                  TensorShape(9U, 2U, 3U), TensorShape(25U, 2U, 3U),
+                                                                  TensorShape(49U, 2U, 3U), TensorShape(64U, 2U, 3U),
+                                                                  TensorShape(16U, 2U, 3U), TensorShape(32U, 2U, 3U),
+                                                                  TensorShape(96U, 2U, 2U)
+                                                                });
+
+const auto shapes_2d = framework::dataset::make("TensorShape", { TensorShape(2U, 2U, 3U), TensorShape(3U, 6U, 3U),
+                                                                 TensorShape(4U, 5U, 3U), TensorShape(5U, 7U, 3U),
+                                                                 TensorShape(7U, 25U, 3U), TensorShape(8U, 2U, 3U),
+                                                                 TensorShape(9U, 16U, 3U), TensorShape(25U, 32U, 3U),
+                                                                 TensorShape(192U, 128U, 2U)
+                                                               });
+
+const auto ActivationFunctionsSmallDataset = framework::dataset::make("ActivationInfo",
+{
+    ActivationLayerInfo(),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f)
+});
+
+RelativeTolerance<float> tolerance_f32(0.1f);   /**< Relative tolerance value for FP32 */
+constexpr float          tolerance_num = 0.07f; /**< Tolerance number */
+
+} // namespace
+TEST_SUITE(NEON)
+TEST_SUITE(FFT1D)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(shapes_1d, data_types),
+               shape, data_type)
+{
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, data_type, 2);
+    Tensor dst = create_tensor<Tensor>(shape, data_type, 2);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NEFFT1D fft1d;
+    fft1d.configure(&src, &dst, FFT1DInfo());
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    validate(src.info()->padding(), PaddingSize());
+    validate(dst.info()->padding(), PaddingSize());
+}
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+        framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 2, DataType::F32), // Mismatching data types
+                                                TensorInfo(TensorShape(32U, 13U, 2U), 2, DataType::F32), // Mismatching shapes
+                                                TensorInfo(TensorShape(32U, 13U, 2U), 3, DataType::F32), // Invalid channels
+                                                TensorInfo(TensorShape(32U, 13U, 2U), 2, DataType::F32), // Unsupported axis
+                                                TensorInfo(TensorShape(11U, 13U, 2U), 2, DataType::F32), // Undecomposable FFT
+                                                TensorInfo(TensorShape(25U, 13U, 2U), 2, DataType::F32),
+        }),
+        framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 2, DataType::F16),
+                                                TensorInfo(TensorShape(16U, 13U, 2U), 2, DataType::F32),
+                                                TensorInfo(TensorShape(32U, 13U, 2U), 2, DataType::F32),
+                                                TensorInfo(TensorShape(32U, 13U, 2U), 2, DataType::F32),
+                                                TensorInfo(TensorShape(11U, 13U, 2U), 2, DataType::F32),
+                                                TensorInfo(TensorShape(25U, 13U, 2U), 2, DataType::F32),
+        })),
+        framework::dataset::make("Axis", { 0, 0, 0, 2, 0, 0 })),
+        framework::dataset::make("Expected", { false, false, false, false, false, true })),
+        input_info, output_info, axis, expected)
+{
+    FFT1DInfo desc;
+    desc.axis = axis;
+    const Status s = NEFFT1D::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), desc);
+    ARM_COMPUTE_EXPECT(bool(s) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using NEFFT1DFixture = FFTValidationFixture<Tensor, Accessor, NEFFT1D, FFT1DInfo, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFFT1DFixture<float>, framework::DatasetMode::ALL, combine(shapes_1d, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_f32, tolerance_num);
+}
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // FFT1D
+
+TEST_SUITE(FFT2D)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(shapes_2d, data_types),
+               shape, data_type)
+{
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, data_type, 2);
+    Tensor dst = create_tensor<Tensor>(shape, data_type, 2);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NEFFT2D fft2d;
+    fft2d.configure(&src, &dst, FFT2DInfo());
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    validate(src.info()->padding(), PaddingSize());
+    validate(dst.info()->padding(), PaddingSize());
+}
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
+        framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 25U, 2U), 2, DataType::F32), // Mismatching data types
+                                                TensorInfo(TensorShape(32U, 25U, 2U), 2, DataType::F32), // Mismatching shapes
+                                                TensorInfo(TensorShape(32U, 25U, 2U), 3, DataType::F32), // Invalid channels
+                                                TensorInfo(TensorShape(32U, 13U, 2U), 2, DataType::F32), // Undecomposable FFT
+                                                TensorInfo(TensorShape(32U, 25U, 2U), 2, DataType::F32),
+        }),
+        framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 25U, 2U), 2, DataType::F16),
+                                                TensorInfo(TensorShape(16U, 25U, 2U), 2, DataType::F32),
+                                                TensorInfo(TensorShape(32U, 25U, 2U), 1, DataType::F32),
+                                                TensorInfo(TensorShape(32U, 13U, 2U), 2, DataType::F32),
+                                                TensorInfo(TensorShape(32U, 25U, 2U), 2, DataType::F32),
+        })),
+        framework::dataset::make("Expected", { false, false, false, false, true })),
+               input_info, output_info, expected)
+{
+    const Status s = NEFFT2D::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), FFT2DInfo());
+    ARM_COMPUTE_EXPECT(bool(s) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using NEFFT2DFixture = FFTValidationFixture<Tensor, Accessor, NEFFT2D, FFT2DInfo, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFFT2DFixture<float>, framework::DatasetMode::ALL, combine(shapes_2d, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_f32, tolerance_num);
+}
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // FFT2D
+
+TEST_SUITE(FFTConvolutionLayer)
+
+template <typename T>
+using NEFFTConvolutionLayerFixture = FFTConvolutionValidationFixture<Tensor, Accessor, NEFFTConvolutionLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFFTConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFFTConvolutionLayerDataset(),
+                                                                                                                 framework::dataset::make("DataType", DataType::F32)),
+                                                                                                                 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                 ActivationFunctionsSmallDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_f32, tolerance_num);
+}
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // FFTConvolutionLayer
+
+TEST_SUITE_END() // NEON
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp
index 57067f1..f0460b4 100644
--- a/tests/validation/NEON/GEMMLowp.cpp
+++ b/tests/validation/NEON/GEMMLowp.cpp
@@ -30,6 +30,7 @@
 #include "tests/NEON/Accessor.h"
 #include "tests/NEON/Helper.h"
 #include "tests/PaddingCalculator.h"
+#include "tests/datasets/GEMMLowpFusedOffsetOutputDataset.h"
 #include "tests/datasets/LargeGEMMLowpDataset.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/datasets/SmallGEMMLowpDataset.h"
@@ -144,6 +145,20 @@
     validate(Accessor(_target), _reference);
 }
 
+using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
+TEST_SUITE(FusedOffsetOutput)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpFusedOffsetOutputDataset())
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpFusedOffsetOutputDataset())
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // FusedOffsetOutput
 TEST_SUITE_END() // MatrixMultiplyCore
 
 TEST_SUITE(OutputStage)
diff --git a/tests/validation/NEON/HeightConcatenateLayer.cpp b/tests/validation/NEON/HeightConcatenateLayer.cpp
new file mode 100644
index 0000000..075dfa3
--- /dev/null
+++ b/tests/validation/NEON/HeightConcatenateLayer.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ConcatenateLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(NEON)
+TEST_SUITE(HeightConcatenateLayer)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+        framework::dataset::make("InputInfo1", {  TensorInfo(TensorShape(23U, 15U, 5U), 1, DataType::F32), // Mismatching data type input/output
+                                                  TensorInfo(TensorShape(22U, 27U, 5U), 1, DataType::F32), // Mismatching y dimension
+                                                  TensorInfo(TensorShape(11U, 25U, 5U), 1, DataType::F32), // Mismatching total height
+                                                  TensorInfo(TensorShape(16U, 25U, 5U), 1, DataType::F32),
+                                                  TensorInfo(TensorShape(35U, 21U, 5U), 1, DataType::F32)
+
+        }),
+        framework::dataset::make("InputInfo2", {  TensorInfo(TensorShape(23U, 15U, 4U), 1, DataType::F32),
+                                                  TensorInfo(TensorShape(22U, 127U, 5U), 1, DataType::F32),
+                                                  TensorInfo(TensorShape(11U, 26U, 5U), 1, DataType::F32),
+                                                  TensorInfo(TensorShape(16U, 25U, 5U), 1, DataType::F32),
+                                                  TensorInfo(TensorShape(35U, 10U, 5U), 1, DataType::F32)
+        })),
+        framework::dataset::make("OutputInfo", {  TensorInfo(TensorShape(23U, 30U, 5U), 1, DataType::F16),
+                                                  TensorInfo(TensorShape(22U, 12U, 5U), 1, DataType::F32),
+                                                  TensorInfo(TensorShape(11U, 7U, 5U), 1, DataType::F32),
+                                                  TensorInfo(TensorShape(16U, 50U, 5U), 1, DataType::F32),
+                                                  TensorInfo(TensorShape(35U, 31U, 5U), 1, DataType::F32)
+        })),
+        framework::dataset::make("Expected", { false, false, false, true, true })),
+        input_info1, input_info2, output_info,expected)
+{
+    std::vector<TensorInfo> inputs_vector_info;
+    inputs_vector_info.emplace_back(std::move(input_info1));
+    inputs_vector_info.emplace_back(std::move(input_info2));
+
+    std::vector<ITensorInfo *> inputs_vector_info_raw;
+    inputs_vector_info_raw.reserve(inputs_vector_info.size());
+    for(auto &input : inputs_vector_info)
+    {
+        inputs_vector_info_raw.emplace_back(&input);
+    }
+
+    bool is_valid = bool(NEConcatenateLayer::validate(inputs_vector_info_raw, &output_info.clone()->set_is_resizable(true), 1));
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using NEHeightConcatenateLayerFixture = ConcatenateLayerValidationFixture<Tensor, ITensor, Accessor, NEConcatenateLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEHeightConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
+                                                                                                                    framework::dataset::make("DataType",
+                                                                                                                            DataType::F32)),
+                                                                                                                    framework::dataset::make("Axis", 1)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEHeightConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType",
+                                                                                                                  DataType::F32)),
+                                                                                                                  framework::dataset::make("Axis", 1)))
+
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEHeightConcatenateLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
+                                                                                                                      framework::dataset::make("DataType",
+                                                                                                                              DataType::QASYMM8)),
+                                                                                                                      framework::dataset::make("Axis", 1)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEHeightConcatenateLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(),
+                                                                                                                    framework::dataset::make("DataType",
+                                                                                                                            DataType::QASYMM8)),
+                                                                                                                    framework::dataset::make("Axis", 1)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // Quantized
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/LSTMLayer.cpp b/tests/validation/NEON/LSTMLayer.cpp
index 5dfd32b..b27dfae 100644
--- a/tests/validation/NEON/LSTMLayer.cpp
+++ b/tests/validation/NEON/LSTMLayer.cpp
@@ -109,7 +109,7 @@
                                                         TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(11U, 13U), 1, DataType::F32),
+                                                        TensorInfo(TensorShape(11U, 2U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(16U, 2U), 1, DataType::F32),
                })),
                framework::dataset::make("ScratchInfo", { TensorInfo(TensorShape(64U, 2U), 1, DataType::F32),
diff --git a/tests/validation/NEON/NonLinearFilter.cpp b/tests/validation/NEON/NonLinearFilter.cpp
index b3d4c93..08ad1e2 100644
--- a/tests/validation/NEON/NonLinearFilter.cpp
+++ b/tests/validation/NEON/NonLinearFilter.cpp
@@ -52,8 +52,8 @@
     const uint8_t                          constant_border_value = distribution_u8(generator);
 
     // Create the mask
-    uint8_t mask[mask_size * mask_size];
-    fill_mask_from_pattern(mask, mask_size, mask_size, pattern);
+    std::vector<uint8_t> mask(mask_size * mask_size);
+    fill_mask_from_pattern(mask.data(), mask_size, mask_size, pattern);
     const auto half_mask_size = static_cast<int>(mask_size / 2);
 
     // Create tensors
@@ -65,7 +65,7 @@
 
     // Create and configure function
     NENonLinearFilter filter;
-    filter.configure(&src, &dst, function, mask_size, pattern, mask, border_mode, constant_border_value);
+    filter.configure(&src, &dst, function, mask_size, pattern, mask.data(), border_mode, constant_border_value);
 
     // Validate valid region
     const ValidRegion dst_valid_region = shape_to_valid_region(shape, border_mode == BorderMode::UNDEFINED, BorderSize(half_mask_size));
diff --git a/tests/validation/NEON/PadLayer.cpp b/tests/validation/NEON/PadLayer.cpp
index 90d3ae9..5049347 100644
--- a/tests/validation/NEON/PadLayer.cpp
+++ b/tests/validation/NEON/PadLayer.cpp
@@ -42,12 +42,14 @@
 {
 namespace
 {
-const auto PaddingSizesDataset = framework::dataset::make("PaddingSize", { PaddingList{ { 0, 0 } },
+const auto PaddingSizesDataset = framework::dataset::make("PaddingSize",
+{
+    PaddingList{ { 0, 0 } },
     PaddingList{ { 1, 1 } },
     PaddingList{ { 1, 1 }, { 2, 2 } },
-    PaddingList{ { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 } },
-    PaddingList{ { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 2 } },
-    PaddingList{ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 1, 1 } }
+    PaddingList{ { 1, 1 }, { 1, 1 }, { 1, 1 } },
+    PaddingList{ { 0, 0 }, { 1, 0 }, { 0, 1 } },
+    PaddingList{ { 0, 1 }, { 1, 0 }, { 0, 1 } },
 });
 } // namespace
 
@@ -57,33 +59,62 @@
 // *INDENT-OFF*
 // clang-format off
 
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
-        framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // Mismatching data type input/output
-                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
-                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
-                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
-                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
-                                                TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32)
-        }),
-        framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16),
-                                                TensorInfo(TensorShape(28U, 11U, 2U), 1, DataType::F32),
-                                                TensorInfo(TensorShape(29U, 17U, 2U), 1, DataType::F32),
-                                                TensorInfo(TensorShape(29U, 15U, 4U, 3U), 1, DataType::F32),
-                                                TensorInfo(TensorShape(27U, 14U, 3U, 4U), 1, DataType::F32),
-                                                TensorInfo(TensorShape(32U, 13U, 2U, 3U), 1, DataType::F32)
-        })),
-        framework::dataset::make("PaddingSize", { PaddingList{{0, 0}},
-                                                  PaddingList{{1, 1}},
-                                                  PaddingList{{1, 1}, {2, 2}},
-                                                  PaddingList{{1,1}, {1,1}, {1,1}, {1,1}},
-                                                  PaddingList{{0,0}, {1,0}, {0,1}, {1,2}},
-                                                  PaddingList{{0,0}, {0,0}, {0,0}, {1,1}}
-        })),
-        framework::dataset::make("Expected", { false, false, true, true, true, true })),
-        input_info, output_info, padding, expected)
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // Mismatching data type input/output
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // Mismatching data type input/output
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32)
+                                                     }),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(28U, 11U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(29U, 17U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(29U, 15U, 4U, 3U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(27U, 14U, 3U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 3U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(28U, 11U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(29U, 17U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(29U, 15U, 4U, 3U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(27U, 14U, 3U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 3U), 1, DataType::F32)
+                                                     })),
+               framework::dataset::make("PaddingSize", { PaddingList{{0, 0}},
+                                                         PaddingList{{1, 1}},
+                                                         PaddingList{{1, 1}, {2, 2}},
+                                                         PaddingList{{1,1}, {1,1}, {1,1}, {1,1}},
+                                                         PaddingList{{0,0}, {1,0}, {0,1}, {1,2}},
+                                                         PaddingList{{0,0}, {0,0}, {0,0}, {1,1}},
+                                                         PaddingList{{0, 0}},
+                                                         PaddingList{{1, 1}},
+                                                         PaddingList{{1, 1}, {2, 2}},
+                                                         PaddingList{{1,1}, {1,1}, {1,1}, {1,1}},
+                                                         PaddingList{{0,0}, {1,0}, {0,1}, {1,2}},
+                                                         PaddingList{{0,0}, {0,0}, {0,0}, {1,1}}
+                                                         })),
+               framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT,
+                                                         PaddingMode::CONSTANT,
+                                                         PaddingMode::CONSTANT,
+                                                         PaddingMode::CONSTANT,
+                                                         PaddingMode::CONSTANT,
+                                                         PaddingMode::CONSTANT,
+                                                         PaddingMode::REFLECT,
+                                                         PaddingMode::REFLECT,
+                                                         PaddingMode::REFLECT,
+                                                         PaddingMode::REFLECT,
+                                                         PaddingMode::REFLECT,
+                                                         PaddingMode::SYMMETRIC })),
+               framework::dataset::make("Expected", { false, false, true, true, true, true, false, false, true, false, false, true })),
+               input_info, output_info, padding, mode, expected)
 {
-    Status s = NEPadLayer::validate(&input_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), padding);
-    ARM_COMPUTE_EXPECT(bool(s) == expected, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bool(NEPadLayer::validate(&input_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), padding, PixelValue(), mode)) == expected, framework::LogLevel::ERRORS);
 }
 
 // clang-format on
@@ -96,17 +127,17 @@
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEPaddingFixture<float>, framework::DatasetMode::ALL,
-                       combine(
-                           combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::F32 })),
-                           PaddingSizesDataset))
+                       combine(combine(combine(datasets::Small3DShapes(), framework::dataset::make("DataType", { DataType::F32 })),
+                                       PaddingSizesDataset),
+                               framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT })))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEPaddingFixture<float>, framework::DatasetMode::NIGHTLY,
-                       combine(
-                           combine(datasets::LargeShapes(), framework::dataset::make("DataType", { DataType::F32 })),
-                           PaddingSizesDataset))
+                       combine(combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType", { DataType::F32 })),
+                                       PaddingSizesDataset),
+                               framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT, PaddingMode::SYMMETRIC })))
 {
     // Validate output
     validate(Accessor(_target), _reference);
@@ -116,17 +147,17 @@
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEPaddingFixture<half>, framework::DatasetMode::ALL,
-                       combine(
-                           combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::F16 })),
-                           PaddingSizesDataset))
+                       combine(combine(combine(datasets::Small3DShapes(), framework::dataset::make("DataType", { DataType::F16 })),
+                                       PaddingSizesDataset),
+                               framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT })))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEPaddingFixture<half>, framework::DatasetMode::NIGHTLY,
-                       combine(
-                           combine(datasets::LargeShapes(), framework::dataset::make("DataType", { DataType::F16 })),
-                           PaddingSizesDataset))
+                       combine(combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType", { DataType::F16 })),
+                                       PaddingSizesDataset),
+                               framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT, PaddingMode::SYMMETRIC })))
 {
     // Validate output
     validate(Accessor(_target), _reference);
@@ -135,41 +166,20 @@
 #endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 TEST_SUITE_END() // Float
 
-TEST_SUITE(Integer)
-TEST_SUITE(S8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEPaddingFixture<int8_t>, framework::DatasetMode::ALL,
-                       combine(
-                           combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::S8 })),
-                           PaddingSizesDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEPaddingFixture<int8_t>, framework::DatasetMode::NIGHTLY,
-                       combine(
-                           combine(datasets::LargeShapes(), framework::dataset::make("DataType", { DataType::S8 })),
-                           PaddingSizesDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END() // S8
-TEST_SUITE_END() // Integer
-
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEPaddingFixture<uint8_t>, framework::DatasetMode::ALL,
-                       combine(
-                           combine(datasets::SmallShapes(), framework::dataset::make("DataType", { DataType::QASYMM8 })),
-                           PaddingSizesDataset))
+                       combine(combine(combine(datasets::Small3DShapes(), framework::dataset::make("DataType", { DataType::QASYMM8 })),
+                                       PaddingSizesDataset),
+                               framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT })))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEPaddingFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
-                       combine(
-                           combine(datasets::LargeShapes(), framework::dataset::make("DataType", { DataType::QASYMM8 })),
-                           PaddingSizesDataset))
+                       combine(combine(combine(datasets::Large3DShapes(), framework::dataset::make("DataType", { DataType::QASYMM8 })),
+                                       PaddingSizesDataset),
+                               framework::dataset::make("PaddingMode", { PaddingMode::CONSTANT, PaddingMode::REFLECT, PaddingMode::SYMMETRIC })))
 {
     // Validate output
     validate(Accessor(_target), _reference);
diff --git a/tests/validation/NEON/PixelWiseMultiplication.cpp b/tests/validation/NEON/PixelWiseMultiplication.cpp
index 77da473..0cc97a2 100644
--- a/tests/validation/NEON/PixelWiseMultiplication.cpp
+++ b/tests/validation/NEON/PixelWiseMultiplication.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,6 +43,7 @@
 const float scale_other = 1.f / 32768.f;
 
 #define DEFAULT_VALIDATE validate(Accessor(_target), _reference);
+#define QASYMM8_VALIDATE validate(Accessor(_target), _reference, AbsoluteTolerance<uint8_t>(1), 0.f);
 #define VALIDATE(TYPE, TOLERANCE) validate(Accessor(_target), _reference, AbsoluteTolerance<TYPE>(TOLERANCE), 0.f);
 #define WRAP_VALIDATE(TYPE, TOLERANCE) validate_wrap(Accessor(_target), _reference, AbsoluteTolerance<TYPE>(TOLERANCE), 0.f);
 
@@ -74,6 +75,24 @@
     {                                                                                                                     \
         VALIDATE                                                                                                          \
     }
+
+#define PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(TEST_NAME, FIXTURE, MODE, SHAPES, DT1, DT2, SCALE, RP, VALIDATE) \
+    FIXTURE_DATA_TEST_CASE(TEST_NAME, NEPixelWiseMultiplication##FIXTURE, framework::DatasetMode::MODE,                           \
+                           combine(combine(combine(combine(combine(combine(combine(combine(                                       \
+                           datasets::SHAPES,                                                                                      \
+                           framework::dataset::make("DataType1", DataType::DT1)),                                                 \
+                           framework::dataset::make("DataType2", DataType::DT2)),                                                 \
+                           framework::dataset::make("Scale", std::move(SCALE))),                                                  \
+                           framework::dataset::make("ConvertPolicy", ConvertPolicy::SATURATE)),                                                                          \
+                           framework::dataset::make("RoundingPolicy", RoundingPolicy::RP)),                                       \
+                           framework::dataset::make("QuantizationInfoIn1", QuantizationInfo(1.0 , 0))),                           \
+                           framework::dataset::make("QuantizationInfoIn2", QuantizationInfo(1.0 , 0))),                           \
+                           framework::dataset::make("QuantizationInfoOut", QuantizationInfo(100.0, 10))))                         \
+    {                                                                                                                             \
+        VALIDATE                                                                                                                  \
+    }
+
+
 // *INDENT-ON*
 // clang-format on
 
@@ -105,6 +124,7 @@
 }
 } // namespace
 
+using NEPixelWiseMultiplicationToQASYMM8Fixture = PixelWiseMultiplicationQuatizedValidationFixture<Tensor, Accessor, NEPixelWiseMultiplication, uint8_t, uint8_t>;
 template <typename T>
 using NEPixelWiseMultiplicationToU8Fixture = PixelWiseMultiplicationValidationFixture<Tensor, Accessor, NEPixelWiseMultiplication, T, uint8_t>;
 template <typename T>
@@ -128,7 +148,8 @@
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid scale
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid data type combination
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
-                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),  // Mismatching data type
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),     // Mismatching data type
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Mismatching data type
                                                       }),
                framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
@@ -137,6 +158,7 @@
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
                                                        TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                      })),
                framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
@@ -145,9 +167,10 @@
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                      })),
                framework::dataset::make("Scale",{  scale_unity, scale_unity, scale_unity, -1.f, scale_unity, scale_unity, scale_unity})),
-               framework::dataset::make("Expected", { true, true, false, false, false, false, false })),
+               framework::dataset::make("Expected", { true, true, false, false, false, false, false, false })),
                input1_info, input2_info, output_info, scale, expected)
 {
     bool has_error = bool(NEPixelWiseMultiplication::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), scale, ConvertPolicy::WRAP, RoundingPolicy::TO_ZERO));
@@ -156,6 +179,25 @@
 // clang-format on
 // *INDENT-ON*
 
+TEST_SUITE(QASYMM8toQASYMM8)
+
+TEST_SUITE(Scale255)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(RunSmall, ToQASYMM8Fixture, PRECOMMIT, SmallShapes(), QASYMM8, QASYMM8, scale_255, TO_NEAREST_UP, WRAP_VALIDATE(uint8_t, 1))
+PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(RunLarge, ToQASYMM8Fixture, NIGHTLY, LargeShapes(), QASYMM8, QASYMM8, scale_255, TO_NEAREST_UP, WRAP_VALIDATE(uint8_t, 1))
+TEST_SUITE_END() // Scale255
+
+TEST_SUITE(ScaleUnity)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(RunSmall, ToQASYMM8Fixture, PRECOMMIT, SmallShapes(), QASYMM8, QASYMM8, scale_unity, TO_ZERO, QASYMM8_VALIDATE)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(RunLarge, ToQASYMM8Fixture, NIGHTLY, LargeShapes(), QASYMM8, QASYMM8, scale_unity, TO_ZERO, QASYMM8_VALIDATE)
+TEST_SUITE_END() // ScaleUnity
+
+TEST_SUITE(ScaleOther)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(RunSmall, ToQASYMM8Fixture, PRECOMMIT, SmallShapes(), QASYMM8, QASYMM8, scale_other, TO_ZERO, QASYMM8_VALIDATE)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(RunLarge, ToQASYMM8Fixture, NIGHTLY, LargeShapes(), QASYMM8, QASYMM8, scale_other, TO_ZERO, QASYMM8_VALIDATE)
+TEST_SUITE_END() // ScaleOther
+
+TEST_SUITE_END() // QASYMM8toQASYMM8
+
 TEST_SUITE(U8toU8)
 
 TEST_SUITE(Scale255)
diff --git a/tests/validation/NEON/PoolingLayer.cpp b/tests/validation/NEON/PoolingLayer.cpp
index 9a15775..129f53b 100644
--- a/tests/validation/NEON/PoolingLayer.cpp
+++ b/tests/validation/NEON/PoolingLayer.cpp
@@ -67,6 +67,8 @@
 constexpr AbsoluteTolerance<float> tolerance_f16(0.01f);   /**< Tolerance value for comparing reference's output against implementation's output for float types */
 #endif                                                     /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for 8-bit asymmetric type */
+const auto                           pool_data_layout_dataset = framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC });
+
 } // namespace
 
 TEST_SUITE(NEON)
@@ -124,7 +126,7 @@
 FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPSmall,
                                                                                                                   framework::dataset::make("DataType",
                                                                                                                           DataType::F32))),
-                                                                                                          framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                          pool_data_layout_dataset))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f32);
@@ -132,7 +134,7 @@
 FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetFP,
                                                                                                                 framework::dataset::make("DataType",
                                                                                                                         DataType::F32))),
-                                                                                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                        pool_data_layout_dataset))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f32);
@@ -143,14 +145,14 @@
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPSmall,
                                                                                                                  framework::dataset::make("DataType", DataType::F16))),
-                                                                                                         framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                         pool_data_layout_dataset))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f16);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetFP,
                                                                                                                framework::dataset::make("DataType", DataType::F16))),
-                                                                                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                       pool_data_layout_dataset))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f16);
@@ -165,20 +167,16 @@
 using NEPoolingLayerQuantizedFixture = PoolingLayerValidationQuantizedFixture<Tensor, Accessor, NEPoolingLayer, T>;
 
 TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQASYMM8Small,
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQASYMM8Small,
                                                                                                                      framework::dataset::make("DataType", DataType::QASYMM8))),
-                                                                                                                     framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127),
-                                                                                                                             QuantizationInfo(7.f / 255, 123)
-                                                                                                                                                                  })),
-                                                                                                                     framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                                     pool_data_layout_dataset))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetQASYMM8,
+FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetQASYMM8,
                                                                                                                    framework::dataset::make("DataType", DataType::QASYMM8))),
-                                                                                                                   framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255, 0) })),
-                                                                                                                   framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                                                                                   pool_data_layout_dataset))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qasymm8);
diff --git a/tests/validation/NEON/QuantizationLayer.cpp b/tests/validation/NEON/QuantizationLayer.cpp
index 6526539..0b503c0 100644
--- a/tests/validation/NEON/QuantizationLayer.cpp
+++ b/tests/validation/NEON/QuantizationLayer.cpp
@@ -55,21 +55,17 @@
 // *INDENT-OFF*
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
-               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::U8),  // Wrong input data type
-                                                       TensorInfo(TensorShape(16U, 5U, 16U), 1, DataType::U8),  // Invalid shape
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8),  // Wrong input data type
                                                        TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),  // Wrong output data type
-                                                       TensorInfo(TensorShape(16U, 16U, 2U, 5U), 1, DataType::U8),  // Missmatching shapes
-                                                       TensorInfo(TensorShape(17U, 16U, 16U, 5U), 1, DataType::U8),  // Shrink window
+                                                       TensorInfo(TensorShape(16U, 16U, 2U, 5U), 1, DataType::F32),  // Missmatching shapes
                                                        TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),  // Valid
                                                      }),
                framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(16U, 5U, 16U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::U16),
-                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(17U, 16U, 16U, 5U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8),
+                                                       TensorInfo(TensorShape(16U, 16U, 16U, 5U), 1, DataType::QASYMM8),
                                                      })),
-               framework::dataset::make("Expected", { false, false, false, false, false, true})),
+               framework::dataset::make("Expected", { false, false, false, true})),
                input_info, output_info, expected)
 {
     ARM_COMPUTE_EXPECT(bool(NEQuantizationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
@@ -81,7 +77,7 @@
 {
     // Create tensors
     Tensor src = create_tensor<Tensor>(shape, data_type);
-    Tensor dst = create_tensor<Tensor>(shape, DataType::U8);
+    Tensor dst = create_tensor<Tensor>(shape, DataType::QASYMM8);
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -96,9 +92,8 @@
     validate(dst.info()->valid_region(), valid_region);
 
     // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 8).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
+    validate(src.info()->padding(), PaddingSize());
+    validate(dst.info()->padding(), PaddingSize());
 }
 
 template <typename T>
@@ -106,20 +101,42 @@
 
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEQuantizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
-                                                                                                               framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEQuantizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
+                                                                                                                       framework::dataset::make("DataType", DataType::F32)),
+                                                                                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_u8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEQuantizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(concat(datasets::Large3DShapes(), datasets::Large4DShapes()),
-                                                                                                             framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEQuantizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(concat(datasets::Large3DShapes(), datasets::Large4DShapes()),
+                                                                                                                     framework::dataset::make("DataType", DataType::F32)),
+                                                                                                             framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_u8);
 }
 TEST_SUITE_END() // FP32
 TEST_SUITE_END() // Float
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+TEST_SUITE(Half)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEQuantizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
+                                                                                                                      framework::dataset::make("DataType", DataType::F16)),
+                                                                                                              framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_u8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEQuantizationLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(concat(datasets::Large3DShapes(), datasets::Large4DShapes()),
+                                                                                                                    framework::dataset::make("DataType", DataType::F16)),
+                                                                                                            framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_u8);
+}
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // Half
+#endif           //  __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
 TEST_SUITE_END() // QuantizationLayer
 TEST_SUITE_END() // NEON
diff --git a/tests/validation/NEON/SpaceToBatchLayer.cpp b/tests/validation/NEON/SpaceToBatchLayer.cpp
new file mode 100644
index 0000000..1d5ef06
--- /dev/null
+++ b/tests/validation/NEON/SpaceToBatchLayer.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/datasets/SpaceToBatchDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/SpaceToBatchFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(NEON)
+TEST_SUITE(SpaceToBatchLayer)
+
+template <typename T>
+using NESpaceToBatchLayerFixture = SpaceToBatchLayerValidationFixture<Tensor, Accessor, NESpaceToBatchLayer, T>;
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),    // Mismatching data types
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),    // Wrong data type block shape
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U, 4U), 1, DataType::F32),    // Wrong tensor shape
+                                                     }),
+               framework::dataset::make("BlockShapeInfo",{ TensorInfo(TensorShape(2U), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(2U), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(2U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(2U), 1, DataType::S32),
+                                                     })),
+               framework::dataset::make("PaddingsShapeInfo",{ TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
+                                                       TensorInfo(TensorShape(2U, 2U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
+                                                     })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
+                                                     })),
+               framework::dataset::make("Expected", { true, false, false, false})),
+               input_info, block_shape_info, paddings_info, output_info, expected)
+{
+    bool has_error = bool(NESpaceToBatchLayer::validate(&input_info.clone()->set_is_resizable(false), &block_shape_info.clone()->set_is_resizable(false), &paddings_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false)));
+    ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS);
+}
+DATA_TEST_CASE(ValidateStatic, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32),    // Mismatching data types
+                                                       TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32),    // Negative block shapes
+                                                       TensorInfo(TensorShape(32U, 16U, 2U, 1U, 4U), 1, DataType::F32), // Wrong tensor shape
+                                                       TensorInfo(TensorShape(32U, 16U, 2U, 1U, 4U), 1, DataType::F32), // Wrong paddings
+                                                     }),
+               framework::dataset::make("BlockShapeX", { 2, 2, 2, 2, 2 })),
+               framework::dataset::make("BlockShapeY", { 2, 2, -2, 2, 2 })),
+               framework::dataset::make("PadLeft", { Size2D(0, 0), Size2D(0, 0), Size2D(0, 0), Size2D(0, 0), Size2D(3, 11) })),
+               framework::dataset::make("PadRight", { Size2D(0, 0), Size2D(0, 0), Size2D(0, 0), Size2D(0, 0), Size2D(3, 11) })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 8U, 2U, 4U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(32U, 8U, 2U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 8U, 2U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 8U, 2U, 4U), 1, DataType::F32),
+                                                     })),
+               framework::dataset::make("Expected", { true, false, false, false, false})),
+               input_info, block_shape_x, block_shape_y, padding_left, padding_right, output_info, expected)
+{
+    bool has_error = bool(NESpaceToBatchLayer::validate(&input_info.clone()->set_is_resizable(false), block_shape_x, block_shape_y, padding_left, padding_right, &output_info.clone()->set_is_resizable(false)));
+    ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(Small, NESpaceToBatchLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallSpaceToBatchLayerDataset(), framework::dataset::make("DataType",
+                                                                                                                    DataType::F32)),
+                                                                                                            framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(Large, NESpaceToBatchLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeSpaceToBatchLayerDataset(), framework::dataset::make("DataType",
+                                                                                                                  DataType::F32)),
+                                                                                                          framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(Small, NESpaceToBatchLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallSpaceToBatchLayerDataset(), framework::dataset::make("DataType",
+                                                                                                                   DataType::F16)),
+                                                                                                           framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(Large, NESpaceToBatchLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeSpaceToBatchLayerDataset(), framework::dataset::make("DataType",
+                                                                                                                 DataType::F16)),
+                                                                                                         framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // Float
+
+TEST_SUITE_END() // SpaceToBatch
+TEST_SUITE_END() // NEON
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/Split.cpp b/tests/validation/NEON/Split.cpp
index 1736096..91b3b9a 100644
--- a/tests/validation/NEON/Split.cpp
+++ b/tests/validation/NEON/Split.cpp
@@ -57,6 +57,7 @@
 {
     std::vector<TensorInfo> outputs_info(splits);
     std::vector<ITensorInfo*> outputs_info_ptr;
+    outputs_info_ptr.reserve(splits);
     for(auto &output_info : outputs_info)
     {
         outputs_info_ptr.emplace_back(&output_info);
@@ -76,6 +77,7 @@
     Tensor                 src = create_tensor<Tensor>(shape, data_type);
     std::vector<Tensor>    dsts(splits);
     std::vector<ITensor *> dsts_ptrs;
+    dsts_ptrs.reserve(splits);
     for(auto &dst : dsts)
     {
         dsts_ptrs.emplace_back(&dst);
diff --git a/tests/validation/NEON/StackLayer.cpp b/tests/validation/NEON/StackLayer.cpp
index c18b9c8..fbe4537 100644
--- a/tests/validation/NEON/StackLayer.cpp
+++ b/tests/validation/NEON/StackLayer.cpp
@@ -70,13 +70,13 @@
 const auto shapes_1d_large = combine(datasets::Large1DShapes(), framework::dataset::make("Axis", -1, 2));
 
 /** Shapes 2D to test */
-const auto shapes_2d_large = combine(datasets::Large2DShapes(), framework::dataset::make("Axis", -2, 3));
+const auto shapes_2d_large = combine(datasets::Medium2DShapes(), framework::dataset::make("Axis", -2, 3));
 
 /** Shapes 3D to test */
-const auto shapes_3d_large = combine(datasets::Large3DShapes(), framework::dataset::make("Axis", -3, 4));
+const auto shapes_3d_large = combine(datasets::Medium3DShapes(), framework::dataset::make("Axis", -3, 4));
 
 /** Shapes 4D to test */
-const auto shapes_4d_large = combine(datasets::Large4DShapes(), framework::dataset::make("Axis", -4, 5));
+const auto shapes_4d_large = combine(datasets::Medium4DShapes(), framework::dataset::make("Axis", -4, 5));
 
 /** Configuration test */
 void validate_configuration(TensorShape shape_in, int axis, DataType data_type, int num_tensors)
diff --git a/tests/validation/NEON/UNIT/TensorAllocator.cpp b/tests/validation/NEON/UNIT/TensorAllocator.cpp
index 384a008..7ba83c1 100644
--- a/tests/validation/NEON/UNIT/TensorAllocator.cpp
+++ b/tests/validation/NEON/UNIT/TensorAllocator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,12 +26,19 @@
 #include "arm_compute/core/utils/misc/Utility.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/MemoryRegion.h"
+#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
 
 #include "support/ToolchainSupport.h"
 
+#include "tests/Globals.h"
 #include "tests/Utils.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/reference/ActivationLayer.h"
+
+#include <memory>
+#include <random>
 
 namespace arm_compute
 {
@@ -52,29 +59,30 @@
     const size_t total_size = info.total_size();
     auto         data       = support::cpp14::make_unique<uint8_t[]>(total_size);
 
-    // Negative case : Import pointer with zero size
+    // Negative case : Import nullptr
     Tensor t1;
     t1.allocator()->init(info);
-    ARM_COMPUTE_EXPECT(!bool(t1.allocator()->import_memory(data.get(), 0)), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(!bool(t1.allocator()->import_memory(nullptr)), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(t1.info()->is_resizable(), framework::LogLevel::ERRORS);
 
-    // Negative case : Import nullptr
-    Tensor t2;
-    t2.allocator()->init(info);
-    ARM_COMPUTE_EXPECT(!bool(t2.allocator()->import_memory(nullptr, total_size)), framework::LogLevel::ERRORS);
+    // Negative case : Import misaligned pointer
+    Tensor       t2;
+    const size_t required_alignment = 339;
+    t2.allocator()->init(info, required_alignment);
+    ARM_COMPUTE_EXPECT(!bool(t2.allocator()->import_memory(data.get())), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(t2.info()->is_resizable(), framework::LogLevel::ERRORS);
 
     // Negative case : Import memory to a tensor that is memory managed
     Tensor      t3;
     MemoryGroup mg;
     t3.allocator()->set_associated_memory_group(&mg);
-    ARM_COMPUTE_EXPECT(!bool(t3.allocator()->import_memory(data.get(), total_size)), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(!bool(t3.allocator()->import_memory(data.get())), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(t3.info()->is_resizable(), framework::LogLevel::ERRORS);
 
     // Positive case : Set raw pointer
     Tensor t4;
     t4.allocator()->init(info);
-    ARM_COMPUTE_EXPECT(bool(t4.allocator()->import_memory(data.get(), total_size)), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bool(t4.allocator()->import_memory(data.get())), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(!t4.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(t4.buffer() == reinterpret_cast<uint8_t *>(data.get()), framework::LogLevel::ERRORS);
     t4.allocator()->free();
@@ -82,6 +90,57 @@
     ARM_COMPUTE_EXPECT(t4.buffer() == nullptr, framework::LogLevel::ERRORS);
 }
 
+TEST_CASE(ImportMemoryMalloc, framework::DatasetMode::ALL)
+{
+    const ActivationLayerInfo act_info(ActivationLayerInfo::ActivationFunction::RELU);
+    const TensorShape         shape     = TensorShape(24U, 16U, 3U);
+    const DataType            data_type = DataType::F32;
+
+    // Create tensor
+    const TensorInfo info(shape, 1, data_type);
+    const size_t     required_alignment = 64;
+    Tensor           tensor;
+    tensor.allocator()->init(info, required_alignment);
+
+    // Create and configure activation function
+    NEActivationLayer act_func;
+    act_func.configure(&tensor, nullptr, act_info);
+
+    // Allocate and import tensor
+    const size_t total_size_in_elems = tensor.info()->tensor_shape().total_size();
+    const size_t total_size_in_bytes = tensor.info()->total_size();
+    size_t       space               = total_size_in_bytes + required_alignment;
+    auto         raw_data            = support::cpp14::make_unique<uint8_t[]>(space);
+
+    void *aligned_ptr = raw_data.get();
+    support::cpp11::align(required_alignment, total_size_in_bytes, aligned_ptr, space);
+
+    ARM_COMPUTE_EXPECT(bool(tensor.allocator()->import_memory(aligned_ptr)), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(!tensor.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Fill tensor
+    std::uniform_real_distribution<float> distribution(-5.f, 5.f);
+    std::mt19937                          gen(library->seed());
+    auto                                 *typed_ptr = reinterpret_cast<float *>(aligned_ptr);
+    for(unsigned int i = 0; i < total_size_in_elems; ++i)
+    {
+        typed_ptr[i] = distribution(gen);
+    }
+
+    // Execute function and sync
+    act_func.run();
+
+    // Validate result by checking that the input has no negative values
+    for(unsigned int i = 0; i < total_size_in_elems; ++i)
+    {
+        ARM_COMPUTE_EXPECT(typed_ptr[i] >= 0, framework::LogLevel::ERRORS);
+    }
+
+    // Release resources
+    tensor.allocator()->free();
+    ARM_COMPUTE_EXPECT(tensor.info()->is_resizable(), framework::LogLevel::ERRORS);
+}
+
 TEST_CASE(AlignedAlloc, framework::DatasetMode::ALL)
 {
     // Init tensor info
diff --git a/tests/validation/NEON/WidthConcatenateLayer.cpp b/tests/validation/NEON/WidthConcatenateLayer.cpp
index 6e94e92..ba0ff1b 100644
--- a/tests/validation/NEON/WidthConcatenateLayer.cpp
+++ b/tests/validation/NEON/WidthConcatenateLayer.cpp
@@ -22,7 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEWidthConcatenateLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/NEON/Accessor.h"
@@ -31,7 +31,7 @@
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/WidthConcatenateLayerFixture.h"
+#include "tests/validation/fixtures/ConcatenateLayerFixture.h"
 
 namespace arm_compute
 {
@@ -47,19 +47,22 @@
         framework::dataset::make("InputInfo1", {  TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching data type input/output
                                                   TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching y dimension
                                                   TensorInfo(TensorShape(23U, 27U, 5U), 1, DataType::F32), // Mismatching total width
-                                                  TensorInfo(TensorShape(16U, 27U, 5U), 1, DataType::F32)
+                                                  TensorInfo(TensorShape(16U, 27U, 5U), 1, DataType::F32),
+                                                  TensorInfo(TensorShape(21U, 35U, 5U), 1, DataType::F32)
         }),
         framework::dataset::make("InputInfo2", {  TensorInfo(TensorShape(24U, 27U, 4U), 1, DataType::F32),
                                                   TensorInfo(TensorShape(52U, 27U, 5U), 1, DataType::F32),
                                                   TensorInfo(TensorShape(52U, 27U, 5U), 1, DataType::F32),
-                                                  TensorInfo(TensorShape(16U, 27U, 5U), 1, DataType::F32)
+                                                  TensorInfo(TensorShape(16U, 27U, 5U), 1, DataType::F32),
+                                                  TensorInfo(TensorShape(10U, 35U, 5U), 1, DataType::F32)
         })),
         framework::dataset::make("OutputInfo", {  TensorInfo(TensorShape(47U, 27U, 5U), 1, DataType::F16),
                                                   TensorInfo(TensorShape(75U, 12U, 5U), 1, DataType::F32),
                                                   TensorInfo(TensorShape(11U, 27U, 5U), 1, DataType::F32),
-                                                  TensorInfo(TensorShape(32U, 27U, 5U), 1, DataType::F32)
+                                                  TensorInfo(TensorShape(32U, 27U, 5U), 1, DataType::F32),
+                                                  TensorInfo(TensorShape(31U, 35U, 5U), 1, DataType::F32)
         })),
-        framework::dataset::make("Expected", { false, false, false, true })),
+        framework::dataset::make("Expected", { false, false, false, true, true })),
         input_info1, input_info2, output_info,expected)
 {
     std::vector<TensorInfo> inputs_vector_info;
@@ -67,32 +70,35 @@
     inputs_vector_info.emplace_back(std::move(input_info2));
 
     std::vector<ITensorInfo *> inputs_vector_info_raw;
+    inputs_vector_info_raw.reserve(inputs_vector_info.size());
     for(auto &input : inputs_vector_info)
     {
         inputs_vector_info_raw.emplace_back(&input);
     }
 
-    bool is_valid = bool(NEWidthConcatenateLayer::validate(inputs_vector_info_raw,
-                                                           &output_info.clone()->set_is_resizable(false)));
+    bool is_valid = bool(NEConcatenateLayer::validate(inputs_vector_info_raw, &output_info.clone()->set_is_resizable(true), 0));
     ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
 // *INDENT-ON*
-
 template <typename T>
-using NEWidthConcatenateLayerFixture = WidthConcatenateLayerValidationFixture<Tensor, ITensor, Accessor, NEWidthConcatenateLayer, T>;
+using NEWidthConcatenateLayerFixture = ConcatenateLayerValidationFixture<Tensor, ITensor, Accessor, NEConcatenateLayer, T>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEWidthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEWidthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
                                                                                                                    framework::dataset::make("DataType",
-                                                                                                                           DataType::F32)))
+                                                                                                                           DataType::F32)),
+                                                                                                                   framework::dataset::make("Axis", 0)))
+
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEWidthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::WidthConcatenateLayerShapes(), framework::dataset::make("DataType",
-                                                                                                                 DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEWidthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::F32)),
+                                                                                                                 framework::dataset::make("Axis", 0)))
+
 {
     // Validate output
     validate(Accessor(_target), _reference);
@@ -102,15 +108,19 @@
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEWidthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEWidthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()),
                                                                                                                      framework::dataset::make("DataType",
-                                                                                                                             DataType::QASYMM8)))
+                                                                                                                             DataType::QASYMM8)),
+                                                                                                                     framework::dataset::make("Axis", 0)))
+
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEWidthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::WidthConcatenateLayerShapes(), framework::dataset::make("DataType",
-                                                                                                                   DataType::QASYMM8)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEWidthConcatenateLayerFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::QASYMM8)),
+                                                                                                                   framework::dataset::make("Axis", 0)))
+
 {
     // Validate output
     validate(Accessor(_target), _reference);
diff --git a/tests/validation/fixtures/ArithmeticOperationsFixture.h b/tests/validation/fixtures/ArithmeticOperationsFixture.h
index fb46a51..76f241c 100644
--- a/tests/validation/fixtures/ArithmeticOperationsFixture.h
+++ b/tests/validation/fixtures/ArithmeticOperationsFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -168,6 +168,20 @@
     }
 };
 
+template <typename TensorType, typename AccessorType, typename FunctionType>
+class ArithmeticSubtractionQuantValidationFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, qasymm8_t>
+{
+public:
+    template <typename...>
+    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy,
+               QuantizationInfo in1_qua_info, QuantizationInfo in2_qua_info, QuantizationInfo out_qua_info)
+    {
+        ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, qasymm8_t>::setup(reference::ArithmeticOperation::SUB, shape, shape,
+                                                                                                    data_type0, data_type1, output_data_type, convert_policy,
+                                                                                                    in1_qua_info, in2_qua_info, out_qua_info);
+    }
+};
+
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
 class ArithmeticSubtractionValidationFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>
 {
diff --git a/tests/validation/fixtures/WidthConcatenateLayerFixture.h b/tests/validation/fixtures/ConcatenateLayerFixture.h
similarity index 78%
rename from tests/validation/fixtures/WidthConcatenateLayerFixture.h
rename to tests/validation/fixtures/ConcatenateLayerFixture.h
index 47a03ed..d1eed63 100644
--- a/tests/validation/fixtures/WidthConcatenateLayerFixture.h
+++ b/tests/validation/fixtures/ConcatenateLayerFixture.h
@@ -33,7 +33,7 @@
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
 #include "tests/validation/Helpers.h"
-#include "tests/validation/reference/WidthConcatenateLayer.h"
+#include "tests/validation/reference/ConcatenateLayer.h"
 
 #include <random>
 
@@ -44,11 +44,11 @@
 namespace validation
 {
 template <typename TensorType, typename ITensorType, typename AccessorType, typename FunctionType, typename T>
-class WidthConcatenateLayerValidationFixture : public framework::Fixture
+class ConcatenateLayerValidationFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type)
+    void setup(TensorShape shape, DataType data_type, unsigned int axis)
     {
         // Create input shapes
         std::mt19937                    gen(library->seed());
@@ -73,17 +73,17 @@
         // Generate more shapes based on the input
         for(auto &s : shapes)
         {
-            // Randomly change the first dimension
+            // Randomly change the dimension
             if(mutate_dis(gen))
             {
                 // Decrease the dimension by a small percentage. Don't increase
                 // as that could make tensor too large.
-                s.set(0, s[0] + 2 * static_cast<int>(s[0] * change_dis(gen)));
+                s.set(axis, s[axis] + 2 * static_cast<int>(s[axis] * change_dis(gen)));
             }
         }
 
-        _target    = compute_target(shapes, qinfo, data_type);
-        _reference = compute_reference(shapes, qinfo, data_type);
+        _target    = compute_target(shapes, qinfo, data_type, axis);
+        _reference = compute_reference(shapes, qinfo, data_type, axis);
     }
 
 protected:
@@ -93,7 +93,7 @@
         library->fill_tensor_uniform(tensor, i);
     }
 
-    TensorType compute_target(std::vector<TensorShape> shapes, const std::vector<QuantizationInfo> &qinfo, DataType data_type)
+    TensorType compute_target(const std::vector<TensorShape> &shapes, const std::vector<QuantizationInfo> &qinfo, DataType data_type, unsigned int axis)
     {
         std::vector<TensorType>    srcs;
         std::vector<ITensorType *> src_ptrs;
@@ -107,13 +107,12 @@
             src_ptrs.emplace_back(&srcs.back());
         }
 
-        TensorShape dst_shape = misc::shape_calculator::calculate_width_concatenate_shape(src_ptrs);
-
-        TensorType dst = create_tensor<TensorType>(dst_shape, data_type, 1, qinfo[shapes.size()]);
+        const TensorShape dst_shape = misc::shape_calculator::calculate_concatenate_shape(src_ptrs, axis);
+        TensorType        dst       = create_tensor<TensorType>(dst_shape, data_type, 1, qinfo[shapes.size()]);
 
         // Create and configure function
-        FunctionType width_concat;
-        width_concat.configure(src_ptrs, &dst);
+        FunctionType concat;
+        concat.configure(src_ptrs, &dst, axis);
 
         for(auto &src : srcs)
         {
@@ -140,26 +139,27 @@
         }
 
         // Compute function
-        width_concat.run();
+        concat.run();
 
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(std::vector<TensorShape> shapes, const std::vector<QuantizationInfo> &qinfo, DataType data_type)
+    SimpleTensor<T> compute_reference(std::vector<TensorShape> &shapes, const std::vector<QuantizationInfo> &qinfo, DataType data_type, unsigned int axis)
     {
         std::vector<SimpleTensor<T>> srcs;
+        std::vector<TensorShape *>   src_ptrs;
 
         // Create and fill tensors
         for(size_t j = 0; j < shapes.size(); ++j)
         {
             srcs.emplace_back(shapes[j], data_type, 1, qinfo[j]);
             fill(srcs.back(), j);
+            src_ptrs.emplace_back(&shapes[j]);
         }
 
-        const TensorShape dst_shape = calculate_width_concatenate_shape(shapes);
+        const TensorShape dst_shape = misc::shape_calculator::calculate_concatenate_shape(src_ptrs, axis);
         SimpleTensor<T>   dst{ dst_shape, data_type, 1, qinfo[shapes.size()] };
-
-        return reference::widthconcatenate_layer<T>(srcs, dst);
+        return reference::concatenate_layer<T>(srcs, dst, axis);
     }
 
     TensorType      _target{};
diff --git a/tests/validation/fixtures/ConvolutionLayerFixture.h b/tests/validation/fixtures/ConvolutionLayerFixture.h
index 795b9de..52fa8da 100644
--- a/tests/validation/fixtures/ConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/ConvolutionLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/tests/validation/fixtures/CropResizeFixture.h b/tests/validation/fixtures/CropResizeFixture.h
new file mode 100644
index 0000000..d83c411
--- /dev/null
+++ b/tests/validation/fixtures/CropResizeFixture.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE
+#define ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/RawLutAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/CropResize.h"
+#include "tests/validation/reference/Permute.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class CropResizeFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape src_shape, TensorShape boxes_shape, Coordinates2D crop_size, InterpolationPolicy method,
+               float extrapolation_value, bool is_outside_bounds, DataType data_type)
+    {
+        _target    = compute_target(src_shape, boxes_shape, crop_size, method, extrapolation_value, is_outside_bounds, data_type);
+        _reference = compute_reference(src_shape, boxes_shape, crop_size, method, extrapolation_value, is_outside_bounds, data_type);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        library->fill_tensor_uniform(tensor, i);
+    }
+
+    template <typename U, typename V>
+    void fill(U &&tensor, int i, V min, V max)
+    {
+        library->fill_tensor_uniform(tensor, i, min, max);
+    }
+
+    TensorType compute_target(const TensorShape &src_shape, const TensorShape &boxes_shape, const Coordinates2D &crop_size, InterpolationPolicy method,
+                              float extrapolation_value, bool is_outside_bounds, DataType data_type)
+    {
+        TensorShape dst_shape(src_shape[0], crop_size.x, crop_size.y, boxes_shape[1]);
+
+        // Create tensors
+        TensorType src       = create_tensor<TensorType>(src_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC);
+        TensorType boxes     = create_tensor<TensorType>(boxes_shape, DataType::F32);
+        TensorType boxes_ind = create_tensor<TensorType>(TensorShape(boxes_shape[1]), DataType::S32);
+        TensorType dst       = create_tensor<TensorType>(dst_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC);
+
+        // Create and configure function
+        FunctionType crop;
+        crop.configure(&src, &boxes, &boxes_ind, &dst, crop_size, method, extrapolation_value);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(boxes.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(boxes_ind.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        boxes.allocator()->allocate();
+        boxes_ind.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!boxes.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!boxes_ind.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src), 0);
+        fill(AccessorType(boxes), 1, is_outside_bounds ? 0.0f - out_of_bounds_reach : 0.0f, is_outside_bounds ? 1.0f + out_of_bounds_reach : 1.0f);
+        fill(AccessorType(boxes_ind), 2, 0, static_cast<int32_t>(src_shape[3] - 1));
+
+        // Compute function
+        crop.run();
+        return dst;
+    }
+
+    SimpleTensor<float> compute_reference(const TensorShape &src_shape, const TensorShape &boxes_shape, const Coordinates2D &crop_size, InterpolationPolicy method,
+                                          float extrapolation_value, bool is_outside_bounds, DataType data_type)
+    {
+        // Create reference
+        SimpleTensor<T>       src{ src_shape, data_type, 1, QuantizationInfo(), DataLayout::NHWC };
+        SimpleTensor<float>   boxes{ boxes_shape, DataType::F32 };
+        SimpleTensor<int32_t> boxes_ind{ TensorShape(boxes_shape[1]), DataType::S32 };
+
+        // Fill reference
+        fill(src, 0);
+        fill(boxes, 1, is_outside_bounds ? 0.0f - out_of_bounds_reach : 0.0f, is_outside_bounds ? 1.0f + out_of_bounds_reach : 1.0f);
+        fill(boxes_ind, 2, 0, static_cast<int32_t>(src.shape()[3] - 1));
+
+        SimpleTensor<float> output = reference::crop_and_resize(src, boxes, boxes_ind, crop_size, method, extrapolation_value);
+
+        SimpleTensor<float> permuted = reference::permute(output, PermutationVector(1, 2U, 0U));
+        return permuted;
+    }
+
+    constexpr static float out_of_bounds_reach = 2.0f;
+
+    TensorType          _target{};
+    SimpleTensor<float> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_SLICE_OPERATIONS_FIXTURE */
diff --git a/tests/validation/fixtures/DepthConcatenateLayerFixture.h b/tests/validation/fixtures/DepthConcatenateLayerFixture.h
deleted file mode 100644
index edeefa2..0000000
--- a/tests/validation/fixtures/DepthConcatenateLayerFixture.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_DEPTHCONCATENATE_LAYER_FIXTURE
-#define ARM_COMPUTE_TEST_DEPTHCONCATENATE_LAYER_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/Helpers.h"
-#include "tests/validation/reference/DepthConcatenateLayer.h"
-
-#include <random>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename ITensorType, typename AccessorType, typename FunctionType, typename T>
-class DepthConcatenateLayerValidationFixture : public framework::Fixture
-{
-public:
-    template <typename...>
-    void setup(TensorShape shape, DataType data_type)
-    {
-        // Create input shapes
-        std::mt19937                    gen(library->seed());
-        std::uniform_int_distribution<> num_dis(2, 4);
-        std::uniform_int_distribution<> offset_dis(0, 20);
-
-        const int num_tensors = num_dis(gen);
-
-        std::vector<TensorShape> shapes(num_tensors, shape);
-
-        // vector holding the quantization info:
-        //      the last element is the output quantization info
-        //      all other elements are the quantization info for the input tensors
-        std::vector<QuantizationInfo> qinfo(num_tensors + 1, QuantizationInfo());
-
-        for(auto &qi : qinfo)
-        {
-            qi = QuantizationInfo(1.f / 255.f, offset_dis(gen));
-        }
-
-        std::uniform_int_distribution<>  depth_dis(1, 3);
-        std::bernoulli_distribution      mutate_dis(0.5f);
-        std::uniform_real_distribution<> change_dis(-0.25f, 0.f);
-
-        // Generate more shapes based on the input
-        for(auto &s : shapes)
-        {
-            // Set the depth of the tensor
-            s.set(2, depth_dis(gen));
-
-            // Randomly change the first dimension
-            if(mutate_dis(gen))
-            {
-                // Decrease the dimension by a small percentage. Don't increase
-                // as that could make tensor too large. Also the change must be
-                // an even number. Otherwise out depth concatenate fails.
-                s.set(0, s[0] + 2 * static_cast<int>(s[0] * change_dis(gen)));
-            }
-
-            // Repeat the same as above for the second dimension
-            if(mutate_dis(gen))
-            {
-                s.set(1, s[1] + 2 * static_cast<int>(s[1] * change_dis(gen)));
-            }
-        }
-
-        _target    = compute_target(shapes, qinfo, data_type);
-        _reference = compute_reference(shapes, qinfo, data_type);
-    }
-
-protected:
-    template <typename U>
-    void fill(U &&tensor, int i)
-    {
-        library->fill_tensor_uniform(tensor, i);
-    }
-
-    TensorType compute_target(std::vector<TensorShape> shapes, const std::vector<QuantizationInfo> &qinfo, DataType data_type)
-    {
-        std::vector<TensorType>    srcs;
-        std::vector<ITensorType *> src_ptrs;
-
-        // Create tensors
-        srcs.reserve(shapes.size());
-
-        for(size_t j = 0; j < shapes.size(); ++j)
-        {
-            srcs.emplace_back(create_tensor<TensorType>(shapes[j], data_type, 1, qinfo[j]));
-            src_ptrs.emplace_back(&srcs.back());
-        }
-
-        TensorShape dst_shape = misc::shape_calculator::calculate_depth_concatenate_shape(src_ptrs);
-        TensorType  dst       = create_tensor<TensorType>(dst_shape, data_type, 1, qinfo[shapes.size()]);
-
-        // Create and configure function
-        FunctionType depth_concat;
-        depth_concat.configure(src_ptrs, &dst);
-
-        for(auto &src : srcs)
-        {
-            ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
-        }
-
-        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
-        // Allocate tensors
-        for(auto &src : srcs)
-        {
-            src.allocator()->allocate();
-            ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
-        }
-
-        dst.allocator()->allocate();
-        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
-        // Fill tensors
-        int i = 0;
-        for(auto &src : srcs)
-        {
-            fill(AccessorType(src), i++);
-        }
-
-        // Compute function
-        depth_concat.run();
-
-        return dst;
-    }
-
-    SimpleTensor<T> compute_reference(std::vector<TensorShape> shapes, const std::vector<QuantizationInfo> &qinfo, DataType data_type)
-    {
-        std::vector<SimpleTensor<T>> srcs;
-
-        // Create and fill tensors
-        for(size_t j = 0; j < shapes.size(); ++j)
-        {
-            srcs.emplace_back(shapes[j], data_type, 1, qinfo[j]);
-            fill(srcs.back(), j);
-        }
-
-        const TensorShape dst_shape = calculate_depth_concatenate_shape(shapes);
-        SimpleTensor<T>   dst{ dst_shape, data_type, 1, qinfo[shapes.size()] };
-
-        return reference::depthconcatenate_layer<T>(srcs, dst);
-    }
-
-    TensorType      _target{};
-    SimpleTensor<T> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_DEPTHCONCATENATE_LAYER_FIXTURE */
diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
index 5428154..9e6dd4b 100644
--- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -33,6 +33,7 @@
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
 #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/ActivationLayer.h"
 #include "tests/validation/reference/DepthwiseConvolutionLayer.h"
 
 #include "utils/Utils.h"
@@ -55,7 +56,8 @@
 
 public:
     template <typename...>
-    void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, unsigned int depth_multiplier, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout)
+    void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation, unsigned int depth_multiplier, DataType data_type, QuantizationInfo quantization_info,
+               DataLayout data_layout, ActivationLayerInfo act_info)
     {
         _quantization_info            = quantization_info;
         _data_type                    = data_type;
@@ -63,15 +65,15 @@
 
         TensorShape weights_shape(kernel_size.width, kernel_size.height);
 
-        const TensorInfo in_info(in_shape, 1, data_type);
-        const TensorInfo we_info(weights_shape, 1, data_type);
-        TensorShape      out_shape = compute_depthwise_convolution_shape(in_info, we_info, pad_stride_info, depth_multiplier);
+        const TensorInfo  in_info(in_shape, 1, data_type);
+        const TensorInfo  we_info(weights_shape, 1, data_type);
+        const TensorShape out_shape = compute_depthwise_convolution_shape(in_info, we_info, pad_stride_info, depth_multiplier, dilation);
 
         weights_shape.set(2, out_shape.z());
         const TensorShape biases_shape(weights_shape[2]);
 
-        _target    = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, depth_multiplier, data_type, bias_data_type, quantization_info, data_layout);
-        _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, depth_multiplier, data_type, bias_data_type, quantization_info);
+        _target    = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, dilation, depth_multiplier, data_type, bias_data_type, quantization_info, data_layout, act_info);
+        _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, dilation, depth_multiplier, data_type, bias_data_type, quantization_info, act_info);
     }
 
 protected:
@@ -104,8 +106,9 @@
         }
     }
 
-    TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape output_shape, PadStrideInfo &pad_stride_info, unsigned int depth_multiplier,
-                              const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info, const DataLayout data_layout)
+    TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape output_shape, PadStrideInfo &pad_stride_info, Size2D dilation,
+                              unsigned int   depth_multiplier,
+                              const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info, const DataLayout data_layout, ActivationLayerInfo act_info)
     {
         if(data_layout == DataLayout::NHWC)
         {
@@ -122,7 +125,7 @@
 
         // Create Depthwise Convolution configure function
         FunctionType dwc;
-        dwc.configure(&src, &weights, &biases, &dst, pad_stride_info, depth_multiplier);
+        dwc.configure(&src, &weights, &biases, &dst, pad_stride_info, depth_multiplier, act_info, dilation);
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -152,8 +155,8 @@
     }
 
     SimpleTensor<T> compute_reference(const TensorShape &in_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &out_shape, const PadStrideInfo &pad_stride_info,
-                                      unsigned int   depth_multiplier,
-                                      const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info)
+                                      const Size2D &dilation, unsigned int depth_multiplier,
+                                      const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info, ActivationLayerInfo act_info)
     {
         SimpleTensor<T>     src{ in_shape, data_type, 1, quantization_info };
         SimpleTensor<T>     weights{ weights_shape, data_type, 1, quantization_info };
@@ -163,7 +166,8 @@
         fill(weights, 1);
         fill(biases, 2);
 
-        return reference::depthwise_convolution(src, weights, biases, out_shape, pad_stride_info, depth_multiplier);
+        SimpleTensor<T> depth_out = reference::depthwise_convolution(src, weights, biases, out_shape, pad_stride_info, depth_multiplier, dilation);
+        return (act_info.enabled()) ? reference::activation_layer<T>(depth_out, act_info) : depth_out;
     }
 
     TensorType       _target{};
@@ -177,10 +181,11 @@
 {
 public:
     template <typename...>
-    void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, unsigned int depth_multiplier, DataType data_type, DataLayout data_layout)
+    void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation, unsigned int depth_multiplier, DataType data_type, DataLayout data_layout,
+               ActivationLayerInfo act_info)
     {
-        DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, kernel_size, pad_stride_info, depth_multiplier,
-                                                                                                            data_type, QuantizationInfo(), data_layout);
+        DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier,
+                                                                                                            data_type, QuantizationInfo(), data_layout, act_info);
     }
 };
 
@@ -189,10 +194,11 @@
 {
 public:
     template <typename...>
-    void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, unsigned int depth_multiplier, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout)
+    void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation, unsigned int depth_multiplier, DataType data_type, QuantizationInfo quantization_info,
+               DataLayout data_layout, ActivationLayerInfo act_info)
     {
-        DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, kernel_size, pad_stride_info, depth_multiplier,
-                                                                                                            data_type, quantization_info, data_layout);
+        DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier,
+                                                                                                            data_type, quantization_info, data_layout, act_info);
     }
 };
 } // namespace validation
diff --git a/tests/validation/fixtures/DequantizationLayerFixture.h b/tests/validation/fixtures/DequantizationLayerFixture.h
index 0bf3522..2e3712d 100644
--- a/tests/validation/fixtures/DequantizationLayerFixture.h
+++ b/tests/validation/fixtures/DequantizationLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,10 +47,10 @@
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type)
+    void setup(TensorShape shape, DataType data_type, QuantizationInfo qinfo)
     {
-        _target    = compute_target(shape, data_type);
-        _reference = compute_reference(shape, data_type);
+        _target    = compute_target(shape, data_type, qinfo);
+        _reference = compute_reference(shape, data_type, qinfo);
     }
 
 protected:
@@ -60,80 +60,28 @@
         library->fill_tensor_uniform(tensor, 0);
     }
 
-    template <typename U>
-    void fill_min_max(U &&tensor)
+    TensorType compute_target(const TensorShape &shape, DataType data_type, QuantizationInfo qinfo)
     {
-        std::mt19937                          gen(library->seed());
-        std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
-
-        Window window;
-
-        window.set(0, Window::Dimension(0, tensor.shape()[0], 2));
-
-        for(unsigned int d = 1; d < tensor.shape().num_dimensions(); ++d)
-        {
-            window.set(d, Window::Dimension(0, tensor.shape()[d], 1));
-        }
-
-        execute_window_loop(window, [&](const Coordinates & id)
-        {
-            const float n1 = distribution(gen);
-            const float n2 = distribution(gen);
-
-            float min = 0.0f;
-            float max = 0.0f;
-
-            if(n1 < n2)
-            {
-                min = n1;
-                max = n2;
-            }
-            else
-            {
-                min = n2;
-                max = n1;
-            }
-
-            auto out_ptr = reinterpret_cast<float *>(tensor(id));
-            out_ptr[0]   = min;
-            out_ptr[1]   = max;
-        });
-    }
-
-    TensorType compute_target(const TensorShape &shape, DataType data_type)
-    {
-        TensorShape shape_min_max = shape;
-        shape_min_max.set(Window::DimX, 2);
-
-        // Remove Y and Z dimensions and keep the batches
-        shape_min_max.remove_dimension(1);
-        shape_min_max.remove_dimension(1);
-
         // Create tensors
-        TensorType src     = create_tensor<TensorType>(shape, data_type);
-        TensorType dst     = create_tensor<TensorType>(shape, DataType::F32);
-        TensorType min_max = create_tensor<TensorType>(shape_min_max, DataType::F32);
+        TensorType src = create_tensor<TensorType>(shape, DataType::QASYMM8, 1, qinfo);
+        TensorType dst = create_tensor<TensorType>(shape, data_type);
 
         // Create and configure function
         FunctionType dequantization_layer;
-        dequantization_layer.configure(&src, &dst, &min_max);
+        dequantization_layer.configure(&src, &dst);
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(min_max.info()->is_resizable(), framework::LogLevel::ERRORS);
 
         // Allocate tensors
         src.allocator()->allocate();
         dst.allocator()->allocate();
-        min_max.allocator()->allocate();
 
         ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!min_max.info()->is_resizable(), framework::LogLevel::ERRORS);
 
         // Fill tensors
         fill(AccessorType(src));
-        fill_min_max(AccessorType(min_max));
 
         // Compute function
         dequantization_layer.run();
@@ -141,28 +89,19 @@
         return dst;
     }
 
-    SimpleTensor<float> compute_reference(const TensorShape &shape, DataType data_type)
+    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, QuantizationInfo qinfo)
     {
-        TensorShape shape_min_max = shape;
-        shape_min_max.set(Window::DimX, 2);
-
-        // Remove Y and Z dimensions and keep the batches
-        shape_min_max.remove_dimension(1);
-        shape_min_max.remove_dimension(1);
-
         // Create reference
-        SimpleTensor<T>     src{ shape, data_type };
-        SimpleTensor<float> min_max{ shape_min_max, data_type };
+        SimpleTensor<uint8_t> src{ shape, DataType::QASYMM8, 1, qinfo };
 
         // Fill reference
         fill(src);
-        fill_min_max(min_max);
 
-        return reference::dequantization_layer<T>(src, min_max);
+        return reference::dequantization_layer<T>(src);
     }
 
-    TensorType          _target{};
-    SimpleTensor<float> _reference{};
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
 };
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/fixtures/FFTFixture.h b/tests/validation/fixtures/FFTFixture.h
new file mode 100644
index 0000000..1aaa596
--- /dev/null
+++ b/tests/validation/fixtures/FFTFixture.h
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_FFT_FIXTURE
+#define ARM_COMPUTE_TEST_FFT_FIXTURE
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/FunctionDescriptors.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/ActivationLayer.h"
+#include "tests/validation/reference/ConvolutionLayer.h"
+#include "tests/validation/reference/DFT.h"
+
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename InfoType, typename T>
+class FFTValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type)
+    {
+        _target    = compute_target(shape, data_type);
+        _reference = compute_reference(shape, data_type);
+        ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(_target.info()->tensor_shape(), _reference.shape());
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor)
+    {
+        std::uniform_real_distribution<float> distribution(-5.f, 5.f);
+        library->fill(tensor, distribution, 0);
+    }
+
+    TensorType compute_target(const TensorShape &shape, DataType data_type)
+    {
+        // Create tensors
+        TensorType src = create_tensor<TensorType>(shape, data_type, 2);
+        TensorType dst = create_tensor<TensorType>(shape, data_type, 2);
+
+        // Create and configure function
+        FunctionType fft;
+        fft.configure(&src, &dst, InfoType());
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src));
+
+        // Compute function
+        fft.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type)
+    {
+        // Create reference
+        SimpleTensor<T> src{ shape, data_type, 2 };
+
+        // Fill reference
+        fill(src);
+        if(std::is_same<InfoType, FFT1DInfo>::value)
+        {
+            return reference::dft_1d(src, reference::FFTDirection::Forward);
+        }
+        else
+        {
+            return reference::dft_2d(src, reference::FFTDirection::Forward);
+        }
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class FFTConvolutionValidationGenericFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation,
+               DataType data_type, DataLayout data_layout, ActivationLayerInfo act_info)
+    {
+        _data_type   = data_type;
+        _data_layout = data_layout;
+
+        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, dilation, act_info);
+        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, dilation, act_info);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        switch(tensor.data_type())
+        {
+            case DataType::F32:
+            {
+                std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            default:
+                library->fill_tensor_uniform(tensor, i);
+        }
+    }
+
+    TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, TensorShape output_shape, const PadStrideInfo &info,
+                              const Size2D &dilation, const ActivationLayerInfo act_info)
+    {
+        ARM_COMPUTE_UNUSED(dilation);
+        ARM_COMPUTE_ERROR_ON((input_shape[2] % weights_shape[2]) != 0);
+
+        if(_data_layout == DataLayout::NHWC)
+        {
+            permute(input_shape, PermutationVector(2U, 0U, 1U));
+            permute(weights_shape, PermutationVector(2U, 0U, 1U));
+            permute(output_shape, PermutationVector(2U, 0U, 1U));
+        }
+
+        // Create tensors
+        TensorType src     = create_tensor<TensorType>(input_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+        TensorType weights = create_tensor<TensorType>(weights_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+        TensorType bias    = create_tensor<TensorType>(bias_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+        TensorType dst     = create_tensor<TensorType>(output_shape, _data_type, 1, QuantizationInfo(), _data_layout);
+
+        // Create and configure function
+        FunctionType conv;
+        conv.configure(&src, &weights, &bias, &dst, info, act_info);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        weights.allocator()->allocate();
+        bias.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src), 0);
+        fill(AccessorType(weights), 1);
+        fill(AccessorType(bias), 2);
+
+        // Compute convolution function
+        conv.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info,
+                                      const Size2D &dilation, const ActivationLayerInfo act_info)
+    {
+        ARM_COMPUTE_ERROR_ON((input_shape[2] % weights_shape[2]) != 0);
+
+        // Create reference
+        SimpleTensor<T> src{ input_shape, _data_type, 1 };
+        SimpleTensor<T> weights{ weights_shape, _data_type, 1 };
+        SimpleTensor<T> bias{ bias_shape, _data_type, 1 };
+
+        // Fill reference
+        fill(src, 0);
+        fill(weights, 1);
+        fill(bias, 2);
+
+        return (act_info.enabled()) ? reference::activation_layer<T>(reference::convolution_layer<T>(src, weights, bias, output_shape, info, dilation), act_info) : reference::convolution_layer<T>(src,
+                weights, bias, output_shape, info, dilation);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+    DataType        _data_type{};
+    DataLayout      _data_layout{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class FFTConvolutionValidationFixture : public FFTConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation,
+               DataType data_type, DataLayout data_layout, ActivationLayerInfo act_info)
+    {
+        FFTConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation,
+                                                                                                 data_type, data_layout, act_info);
+    }
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_FFT_FIXTURE */
diff --git a/tests/validation/fixtures/GEMMFixture.h b/tests/validation/fixtures/GEMMFixture.h
index a6a3b67..77d2ca6 100644
--- a/tests/validation/fixtures/GEMMFixture.h
+++ b/tests/validation/fixtures/GEMMFixture.h
@@ -383,6 +383,220 @@
     TensorType      _target{};
     SimpleTensor<T> _reference{};
 };
+
+template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSFunctionType, typename GEMMFunctionType>
+class GEMMMatrixMultiplyReshapedOnlyRHSValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0,
+               bool interleave_rhs, bool transpose_rhs, DataType data_type, float alpha)
+    {
+        GEMMLHSMatrixInfo lhs_info;
+        lhs_info.m0 = m0;
+        lhs_info.k0 = k0;
+
+        GEMMRHSMatrixInfo rhs_info;
+        rhs_info.n0         = n0;
+        rhs_info.k0         = k0;
+        rhs_info.h0         = h0;
+        rhs_info.interleave = interleave_rhs;
+        rhs_info.transpose  = transpose_rhs;
+
+        // Set the tensor shapes for LHS and RHS matrices
+        const TensorShape lhs_shape(k, m, batch_size);
+        const TensorShape rhs_shape(n, k, batch_size);
+
+        _target    = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info, data_type, alpha);
+        _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+        library->fill(tensor, distribution, i);
+
+        // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
+        std::uniform_real_distribution<> distribution_inf(std::numeric_limits<float>::infinity(), std::numeric_limits<float>::infinity());
+        library->fill_borders_with_garbage(tensor, distribution_inf, i);
+    }
+
+    TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, DataType data_type, float alpha)
+    {
+        // Create tensors
+        TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
+        TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
+        TensorType rhs_reshaped;
+        TensorType dst;
+
+        const unsigned int M = lhs_shape[1];
+        const unsigned int N = rhs_shape[0];
+        const unsigned int K = lhs_shape[0];
+
+        // The output tensor will be auto-initialized within the function
+
+        // Create and configure function
+        ReshapeRHSFunctionType reshape_rhs;
+        GEMMFunctionType       gemm;
+        reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info);
+        gemm.configure(&lhs, &rhs_reshaped, &dst, alpha, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K));
+
+        ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        lhs.allocator()->allocate();
+        rhs.allocator()->allocate();
+        rhs_reshaped.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(lhs), 0);
+        fill(AccessorType(rhs), 1);
+
+        // Compute GEMM
+        reshape_rhs.run();
+        gemm.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha)
+    {
+        TensorShape dst_shape = lhs_shape;
+        dst_shape[0]          = rhs_shape[0];
+        dst_shape[1]          = lhs_shape[1];
+
+        // Create reference
+        SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
+        SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
+        SimpleTensor<T> c{ dst_shape, data_type, 1 };
+
+        // Fill reference
+        fill(lhs, 0);
+        fill(rhs, 1);
+
+        return reference::gemm<T>(lhs, rhs, c, alpha, 0.0f);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSFunctionType, typename GEMMFunctionType>
+class GEMMMatrixMultiplyReshapedOnlyRHS3DValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0,
+               bool interleave_rhs, bool transpose_rhs, DataType data_type, float alpha)
+    {
+        GEMMLHSMatrixInfo lhs_info;
+        lhs_info.m0 = m0;
+        lhs_info.k0 = k0;
+
+        GEMMRHSMatrixInfo rhs_info;
+        rhs_info.n0         = n0;
+        rhs_info.k0         = k0;
+        rhs_info.h0         = h0;
+        rhs_info.interleave = interleave_rhs;
+        rhs_info.transpose  = transpose_rhs;
+
+        // In case of GEMM3D, m is the product between m_w and m_h
+        const unsigned int m = m_w * m_h;
+
+        // Set the tensor shapes for LHS and RHS matrices
+        const TensorShape lhs_shape(k, m, batch_size);
+        const TensorShape rhs_shape(n, k, batch_size);
+
+        _target    = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info, data_type, alpha, m_h);
+        _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, m_h);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+        library->fill(tensor, distribution, i);
+    }
+
+    TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, DataType data_type, float alpha,
+                              unsigned int m_h)
+    {
+        // Create tensors
+        TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
+        TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
+        TensorType rhs_reshaped;
+        TensorType dst;
+
+        const unsigned int M = lhs_shape[1];
+        const unsigned int N = rhs_shape[0];
+        const unsigned int K = lhs_shape[0];
+
+        // The output tensor will be auto-initialized within the function
+
+        // Create and configure function
+        ReshapeRHSFunctionType reshape_rhs;
+        GEMMFunctionType       gemm;
+        reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info);
+        gemm.configure(&lhs, &rhs_reshaped, &dst, alpha, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, m_h));
+
+        ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        lhs.allocator()->allocate();
+        rhs.allocator()->allocate();
+        rhs_reshaped.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(lhs), 0);
+        fill(AccessorType(rhs), 1);
+
+        // Compute GEMM
+        reshape_rhs.run();
+        gemm.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, unsigned int m_h)
+    {
+        TensorShape dst_shape = lhs_shape;
+        dst_shape.set(0, rhs_shape[0]);
+        dst_shape.set(1, lhs_shape[1] / m_h);
+        dst_shape.set(2, m_h);
+        dst_shape.set(3, lhs_shape[2]);
+
+        // Create reference
+        SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
+        SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
+        SimpleTensor<T> c{ dst_shape, data_type, 1 };
+
+        // Fill reference
+        fill(lhs, 0);
+        fill(rhs, 1);
+
+        return reference::gemm<T>(lhs, rhs, c, alpha, 0.0f);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+};
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h
index 836f8ed..5793ebd 100644
--- a/tests/validation/fixtures/GEMMLowpFixture.h
+++ b/tests/validation/fixtures/GEMMLowpFixture.h
@@ -42,88 +42,166 @@
 {
 namespace validation
 {
+namespace
+{
+template <typename U>
+void fill(U &&tensor, int i)
+{
+    // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
+    std::uniform_int_distribution<> distribution(1, 254);
+    library->fill(tensor, distribution, i);
+}
+
+template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d, bool reinterpret_output_as_3d, typename OutputType, bool is_fused = false>
+TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
+                                   GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo())
+{
+    // Create tensors
+    TensorType a      = create_tensor<TensorType>(shape_a, DataType::QASYMM8, 1);
+    TensorType b      = create_tensor<TensorType>(shape_b, DataType::QASYMM8, 1);
+    TensorType output = create_tensor<TensorType>(shape_output, output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : DataType::QASYMM8, 1);
+
+    a.info()->set_quantization_info(QuantizationInfo(1.0f / 255, a_offset));
+    b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset));
+
+    TensorType bias;
+    if(is_fused)
+    {
+        TensorShape bias_shape(shape_b[0]);
+        bias = create_tensor<TensorType>(bias_shape, DataType::S32, 1);
+    }
+
+    // Create and configure function
+    // The GEMMinfo includes the values of the depth in case of reinterpreted 3d input/output
+    FunctionType gemmlowp;
+    // TODO (COMPMID-1672) - Extending the test to validate add bias in offset contribution
+    gemmlowp.configure(&a, &b, is_fused ? &bias : nullptr, &output, GEMMInfo(false, false, false, (reinterpret_output_as_3d ? shape_output[2] : 0), reinterpret_input_as_3d, false, output_stage));
+
+    ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(output.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Allocate tensors
+    a.allocator()->allocate();
+    b.allocator()->allocate();
+    output.allocator()->allocate();
+
+    ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(!output.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Fill tensors
+    fill(AccessorType(a), 0);
+    fill(AccessorType(b), 1);
+
+    if(is_fused)
+    {
+        ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+        bias.allocator()->allocate();
+        ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+        fill(AccessorType(bias), 2);
+    }
+
+    // Compute GEMM function
+    gemmlowp.run();
+    return output;
+}
+
+template <bool        reinterpret_input_as_3d>
+SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset)
+{
+    TensorShape shape_a_to_use = shape_a;
+    if(reinterpret_input_as_3d)
+    {
+        // Collapse the second and third dimension if the input is 3D
+        shape_a_to_use.collapse(2U, 1U);
+    }
+
+    // Create reference
+    SimpleTensor<uint8_t> a{ shape_a_to_use, DataType::QASYMM8, 1 };
+    SimpleTensor<uint8_t> b{ shape_b, DataType::QASYMM8, 1 };
+
+    // Fill reference
+    fill(a, 0);
+    fill(b, 1);
+
+    return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(a, b, shape_output, a_offset, b_offset);
+}
+}
+
 template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false>
 class GEMMLowpMatrixMultiplyCoreValidationFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, int32_t a_offset, int32_t b_offset)
+    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset)
     {
-        _target    = compute_target(shape_a, shape_b, shape_c, a_offset, b_offset);
-        _reference = compute_reference(shape_a, shape_b, shape_c, a_offset, b_offset);
+        _target    = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset);
+        _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset);
     }
 
 protected:
-    template <typename U>
-    void fill(U &&tensor, int i)
+    TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset)
     {
-        // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
-        std::uniform_int_distribution<> distribution(1, 254);
-        library->fill(tensor, distribution, i);
+        return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t>(shape_a, shape_b, shape_output, a_offset, b_offset);
     }
 
-    TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, int32_t a_offset, int32_t b_offset)
+    SimpleTensor<int32_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset)
     {
-        // Create tensors
-        TensorType a = create_tensor<TensorType>(shape_a, DataType::QASYMM8, 1);
-        TensorType b = create_tensor<TensorType>(shape_b, DataType::QASYMM8, 1);
-        TensorType c = create_tensor<TensorType>(shape_c, DataType::S32, 1);
-
-        a.info()->set_quantization_info(QuantizationInfo(1.0f / 255, a_offset));
-        b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset));
-
-        // Create and configure function
-        // The GEMMinfo includes the values of the depth in case of reinterpreted 3d input/output
-        FunctionType gemmlowp;
-        // TODO (COMPMID-1672) - Extending the test to validate add bias in offset contribution
-        gemmlowp.configure(&a, &b, nullptr, &c, GEMMInfo(false, false, false, (reinterpret_output_as_3d ? shape_c[2] : 0), reinterpret_input_as_3d));
-
-        ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS);
-
-        // Allocate tensors
-        a.allocator()->allocate();
-        b.allocator()->allocate();
-        c.allocator()->allocate();
-
-        ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!c.info()->is_resizable(), framework::LogLevel::ERRORS);
-
-        // Fill tensors
-        fill(AccessorType(a), 0);
-        fill(AccessorType(b), 1);
-
-        // Compute GEMM function
-        gemmlowp.run();
-        return c;
-    }
-
-    SimpleTensor<int32_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, int32_t a_offset, int32_t b_offset)
-    {
-        TensorShape shape_a_to_use = shape_a;
-        if(reinterpret_input_as_3d)
-        {
-            // Collapse the second and third dimension if the input is 3D
-            shape_a_to_use.collapse(2U, 1U);
-        }
-
-        // Create reference
-        SimpleTensor<uint8_t> a{ shape_a_to_use, DataType::QASYMM8, 1 };
-        SimpleTensor<uint8_t> b{ shape_b, DataType::QASYMM8, 1 };
-
-        // Fill reference
-        fill(a, 0);
-        fill(b, 1);
-
-        return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(a, b, shape_c, a_offset, b_offset);
+        return compute_gemmlowp_reference<reinterpret_input_as_3d>(shape_a, shape_b, shape_output, a_offset, b_offset);
     }
 
     TensorType            _target{};
     SimpleTensor<int32_t> _reference{};
 };
 
+template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false>
+class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage)
+    {
+        ARM_COMPUTE_EXPECT(output_stage.type != GEMMLowpOutputStageType::NONE, framework::LogLevel::ERRORS);
+        _target    = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage);
+        _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage);
+    }
+
+protected:
+    TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage)
+    {
+        return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, qasymm8_t, true>(shape_a, shape_b, shape_output, a_offset, b_offset,
+                output_stage);
+    }
+
+    SimpleTensor<qasymm8_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
+                                              GEMMLowpOutputStageInfo output_stage)
+    {
+        SimpleTensor<int32_t> output = compute_gemmlowp_reference<reinterpret_input_as_3d>(shape_a, shape_b, shape_output, a_offset, b_offset);
+
+        TensorShape           bias_shape(shape_b[0]);
+        SimpleTensor<int32_t> bias{ bias_shape, DataType::S32, 1 };
+        fill(bias, 2);
+
+        switch(output_stage.type)
+        {
+            case GEMMLowpOutputStageType::QUANTIZE_DOWN:
+                return reference::gemmlowp_quantize_down_int32_to_uint8_scale<int32_t>(output, bias,
+                                                                                       output_stage.gemmlowp_offset, output_stage.gemmlowp_multiplier, output_stage.gemmlowp_shift, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound);
+                break;
+            case GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT:
+                return reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(output, bias,
+                                                                                                     output_stage.gemmlowp_multiplier, output_stage.gemmlowp_shift, output_stage.gemmlowp_offset, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound);
+                break;
+            default:
+                ARM_COMPUTE_ERROR("Not Supported!");
+        }
+    }
+
+    TensorType              _target{};
+    SimpleTensor<qasymm8_t> _reference{};
+};
+
 template <typename TensorType, typename AccessorType, typename FunctionType>
 class GEMMLowpQuantizeDownInt32ToUint8ScaleValidationFixture : public framework::Fixture
 {
@@ -533,7 +611,215 @@
     TensorType            _target{};
     SimpleTensor<int32_t> _reference{};
 };
+
+template <typename TensorType, typename AccessorType, typename ReshapeRHSFunctionType, typename GEMMFunctionType>
+class GEMMLowpMatrixMultiplyReshapedOnlyRHSValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs)
+    {
+        GEMMLHSMatrixInfo lhs_info;
+        lhs_info.m0 = m0;
+        lhs_info.k0 = k0;
+
+        GEMMRHSMatrixInfo rhs_info;
+        rhs_info.n0         = n0;
+        rhs_info.k0         = k0;
+        rhs_info.h0         = h0;
+        rhs_info.interleave = interleave_rhs;
+        rhs_info.transpose  = transpose_rhs;
+
+        // Set the tensor shapes for LHS and RHS matrices
+        const TensorShape lhs_shape(k, m, batch_size);
+        const TensorShape rhs_shape(n, k, batch_size);
+
+        _target    = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info);
+        _reference = compute_reference(lhs_shape, rhs_shape);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
+        std::uniform_int_distribution<> distribution(1, 254);
+        library->fill(tensor, distribution, i);
+    }
+
+    TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info)
+    {
+        // Create tensors
+        TensorType lhs = create_tensor<TensorType>(lhs_shape, DataType::QASYMM8, 1);
+        TensorType rhs = create_tensor<TensorType>(rhs_shape, DataType::QASYMM8, 1);
+        TensorType rhs_reshaped;
+        TensorType dst;
+
+        const unsigned int M = lhs_shape[1];
+        const unsigned int N = rhs_shape[0];
+        const unsigned int K = lhs_shape[0];
+
+        // The output tensor will be auto-initialized within the function
+
+        // Create and configure function
+        ReshapeRHSFunctionType reshape_rhs;
+        GEMMFunctionType       gemm;
+        reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info);
+        gemm.configure(&lhs, &rhs_reshaped, &dst, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K));
+
+        ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        lhs.allocator()->allocate();
+        rhs.allocator()->allocate();
+        rhs_reshaped.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(lhs), 0);
+        fill(AccessorType(rhs), 1);
+
+        // Compute GEMM
+        reshape_rhs.run();
+        gemm.run();
+
+        return dst;
+    }
+
+    SimpleTensor<int32_t> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape)
+    {
+        TensorShape dst_shape = lhs_shape;
+        dst_shape[0]          = rhs_shape[0];
+        dst_shape[1]          = lhs_shape[1];
+
+        // Create reference
+        SimpleTensor<uint8_t> lhs{ lhs_shape, DataType::QASYMM8, 1 };
+        SimpleTensor<uint8_t> rhs{ rhs_shape, DataType::QASYMM8, 1 };
+
+        // Fill reference
+        fill(lhs, 0);
+        fill(rhs, 1);
+
+        return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(lhs, rhs, dst_shape, 0, 0);
+    }
+
+    TensorType            _target{};
+    SimpleTensor<int32_t> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename ReshapeRHSFunctionType, typename GEMMFunctionType>
+class GEMMLowpMatrixMultiplyReshapedOnlyRHS3DValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0,
+               bool interleave_rhs, bool transpose_rhs)
+    {
+        GEMMLHSMatrixInfo lhs_info;
+        lhs_info.m0 = m0;
+        lhs_info.k0 = k0;
+
+        GEMMRHSMatrixInfo rhs_info;
+        rhs_info.n0         = n0;
+        rhs_info.k0         = k0;
+        rhs_info.h0         = h0;
+        rhs_info.interleave = interleave_rhs;
+        rhs_info.transpose  = transpose_rhs;
+
+        // In case of GEMM3D, m is the product between m_w and m_h
+        const unsigned int m = m_w * m_h;
+
+        // Set the tensor shapes for LHS and RHS matrices
+        const TensorShape lhs_shape(k, m, batch_size);
+        const TensorShape rhs_shape(n, k, batch_size);
+
+        _target    = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info, m_h);
+        _reference = compute_reference(lhs_shape, rhs_shape, m_h);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
+        std::uniform_int_distribution<> distribution(1, 254);
+        library->fill(tensor, distribution, i);
+    }
+
+    TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, unsigned int m_h)
+    {
+        // Create tensors
+        TensorType lhs = create_tensor<TensorType>(lhs_shape, DataType::QASYMM8, 1);
+        TensorType rhs = create_tensor<TensorType>(rhs_shape, DataType::QASYMM8, 1);
+        TensorType rhs_reshaped;
+        TensorType dst;
+
+        const unsigned int M = lhs_shape[1];
+        const unsigned int N = rhs_shape[0];
+        const unsigned int K = lhs_shape[0];
+
+        // The output tensor will be auto-initialized within the function
+
+        // Create and configure function
+        ReshapeRHSFunctionType reshape_rhs;
+        GEMMFunctionType       gemm;
+        reshape_rhs.configure(&rhs, &rhs_reshaped, rhs_info);
+        gemm.configure(&lhs, &rhs_reshaped, &dst, lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, m_h));
+
+        ARM_COMPUTE_EXPECT(lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        lhs.allocator()->allocate();
+        rhs.allocator()->allocate();
+        rhs_reshaped.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!lhs.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!rhs.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!rhs_reshaped.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(lhs), 0);
+        fill(AccessorType(rhs), 1);
+
+        // Compute GEMM
+        reshape_rhs.run();
+        gemm.run();
+
+        return dst;
+    }
+
+    SimpleTensor<int32_t> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, unsigned int m_h)
+    {
+        TensorShape dst_shape = lhs_shape;
+        dst_shape.set(0, rhs_shape[0]);
+        dst_shape.set(1, lhs_shape[1] / m_h);
+        dst_shape.set(2, m_h);
+        dst_shape.set(3, lhs_shape[2]);
+
+        // Create reference
+        SimpleTensor<uint8_t> lhs{ lhs_shape, DataType::QASYMM8, 1 };
+        SimpleTensor<uint8_t> rhs{ rhs_shape, DataType::QASYMM8, 1 };
+
+        // Fill reference
+        fill(lhs, 0);
+        fill(rhs, 1);
+
+        return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(lhs, rhs, dst_shape, 0, 0);
+    }
+
+    TensorType            _target{};
+    SimpleTensor<int32_t> _reference{};
+};
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE */
\ No newline at end of file
+#endif /* ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE */
diff --git a/tests/validation/fixtures/LSTMLayerFixture.h b/tests/validation/fixtures/LSTMLayerFixture.h
index b30f1e5..2cf83b8 100644
--- a/tests/validation/fixtures/LSTMLayerFixture.h
+++ b/tests/validation/fixtures/LSTMLayerFixture.h
@@ -29,11 +29,11 @@
 #include "tests/framework/Fixture.h"
 #include "tests/validation/reference/ActivationLayer.h"
 #include "tests/validation/reference/ArithmeticOperations.h"
+#include "tests/validation/reference/ConcatenateLayer.h"
 #include "tests/validation/reference/FullyConnectedLayer.h"
 #include "tests/validation/reference/GEMM.h"
 #include "tests/validation/reference/PixelWiseMultiplication.h"
 #include "tests/validation/reference/Transpose.h"
-#include "tests/validation/reference/WidthConcatenateLayer.h"
 
 namespace arm_compute
 {
@@ -415,7 +415,7 @@
         scratch_inputs.emplace_back(std::move(cell_state_out));
         scratch_inputs.emplace_back(std::move(forget_gate));
         scratch_inputs.emplace_back(std::move(output));
-        scratch            = reference::widthconcatenate_layer(scratch_inputs, scratch);
+        scratch            = reference::concatenate_layer(scratch_inputs, scratch, Window::DimX);
         _reference_scratch = std::move(scratch);
         return output_state_out;
     }
diff --git a/tests/validation/fixtures/PadLayerFixture.h b/tests/validation/fixtures/PadLayerFixture.h
index 839313a..3538cab 100644
--- a/tests/validation/fixtures/PadLayerFixture.h
+++ b/tests/validation/fixtures/PadLayerFixture.h
@@ -45,30 +45,54 @@
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type, const PaddingList &padding)
+    void setup(TensorShape shape, DataType data_type, const PaddingList &padding, const PaddingMode mode)
     {
-        _target    = compute_target(shape, data_type, padding);
-        _reference = compute_reference(shape, data_type, padding);
+        PaddingList clamped_padding = padding;
+        if(mode != PaddingMode::CONSTANT)
+        {
+            // Clamp padding to prevent applying more than is possible.
+            for(uint32_t i = 0; i < padding.size(); ++i)
+            {
+                if(mode == PaddingMode::REFLECT)
+                {
+                    clamped_padding[i].first  = std::min(static_cast<uint64_t>(padding[i].first), static_cast<uint64_t>(shape[i] - 1));
+                    clamped_padding[i].second = std::min(static_cast<uint64_t>(padding[i].second), static_cast<uint64_t>(shape[i] - 1));
+                }
+                else
+                {
+                    clamped_padding[i].first  = std::min(static_cast<uint64_t>(padding[i].first), static_cast<uint64_t>(shape[i]));
+                    clamped_padding[i].second = std::min(static_cast<uint64_t>(padding[i].second), static_cast<uint64_t>(shape[i]));
+                }
+            }
+        }
+        _target    = compute_target(shape, data_type, clamped_padding, mode);
+        _reference = compute_reference(shape, data_type, clamped_padding, mode);
     }
 
 protected:
     template <typename U>
-    void fill(U &&tensor)
+    void fill(U &&tensor, int i)
     {
-        library->fill_tensor_uniform(tensor, 0);
+        library->fill_tensor_uniform(tensor, i);
     }
 
     TensorType compute_target(const TensorShape &shape,
                               DataType           data_type,
-                              const PaddingList &paddings)
+                              const PaddingList &paddings,
+                              const PaddingMode  mode)
     {
         // Create tensors
         TensorType src = create_tensor<TensorType>(shape, data_type);
         TensorType dst;
 
+        TensorType const_val = create_tensor<TensorType>(TensorShape(1), data_type);
+        const_val.allocator()->allocate();
+        fill(AccessorType(const_val), 1);
+        T const_value = *static_cast<T *>(AccessorType(const_val)(Coordinates(0)));
+
         // Create and configure function
         FunctionType padding;
-        padding.configure(&src, &dst, paddings);
+        padding.configure(&src, &dst, paddings, const_value, mode);
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -81,7 +105,7 @@
         ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
         // Fill tensors
-        fill(AccessorType(src));
+        fill(AccessorType(src), 0);
 
         // Compute function
         padding.run();
@@ -90,15 +114,17 @@
     }
 
     SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type,
-                                      const PaddingList &paddings)
+                                      const PaddingList &paddings, const PaddingMode mode)
     {
         // Create reference tensor
         SimpleTensor<T> src{ shape, data_type };
+        SimpleTensor<T> const_val{ TensorShape(1), data_type };
 
         // Fill reference tensor
-        fill(src);
+        fill(src, 0);
+        fill(const_val, 1);
 
-        return reference::pad_layer(src, paddings);
+        return reference::pad_layer(src, paddings, const_val[0], mode);
     }
 
     TensorType      _target{};
diff --git a/tests/validation/fixtures/PixelWiseMultiplicationFixture.h b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h
index 9927b75..efdf5d0 100644
--- a/tests/validation/fixtures/PixelWiseMultiplicationFixture.h
+++ b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -122,6 +122,19 @@
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
+class PixelWiseMultiplicationQuatizedValidationFixture : public PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>
+{
+public:
+    template <typename...>
+    void setup(const TensorShape &shape, DataType dt_in1, DataType dt_in2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy,
+               QuantizationInfo in1_qua_info, QuantizationInfo in2_qua_info, QuantizationInfo out_qua_info)
+    {
+        PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, shape, dt_in1, dt_in2, scale, convert_policy, rounding_policy,
+                                                                                                               in1_qua_info, in2_qua_info, out_qua_info);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
 class PixelWiseMultiplicationValidationFixture : public PixelWiseMultiplicationGenericValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>
 {
 public:
diff --git a/tests/validation/fixtures/PoolingLayerFixture.h b/tests/validation/fixtures/PoolingLayerFixture.h
index 3e34f98..1813ef4 100644
--- a/tests/validation/fixtures/PoolingLayerFixture.h
+++ b/tests/validation/fixtures/PoolingLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "tests/AssetsLibrary.h"
 #include "tests/Globals.h"
@@ -47,13 +48,16 @@
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, PoolingLayerInfo pool_info, DataType data_type, DataLayout data_layout, QuantizationInfo quantization_info)
+    void setup(TensorShape shape, PoolingLayerInfo pool_info, DataType data_type, DataLayout data_layout)
     {
-        _quantization_info = quantization_info;
-        _pool_info         = pool_info;
+        std::mt19937                    gen(library->seed());
+        std::uniform_int_distribution<> offset_dis(0, 20);
+        const QuantizationInfo          input_qinfo(1.f / 255.f, offset_dis(gen));
+        const QuantizationInfo          output_qinfo(1.f / 255.f, offset_dis(gen));
 
-        _target    = compute_target(shape, pool_info, data_type, data_layout, quantization_info);
-        _reference = compute_reference(shape, pool_info, data_type, quantization_info);
+        _pool_info = pool_info;
+        _target    = compute_target(shape, pool_info, data_type, data_layout, input_qinfo, output_qinfo);
+        _reference = compute_reference(shape, pool_info, data_type, input_qinfo, output_qinfo);
     }
 
 protected:
@@ -72,7 +76,7 @@
     }
 
     TensorType compute_target(TensorShape shape, PoolingLayerInfo info,
-                              DataType data_type, DataLayout data_layout, QuantizationInfo quantization_info)
+                              DataType data_type, DataLayout data_layout, QuantizationInfo input_qinfo, QuantizationInfo output_qinfo)
     {
         // Change shape in case of NHWC.
         if(data_layout == DataLayout::NHWC)
@@ -81,8 +85,9 @@
         }
 
         // Create tensors
-        TensorType src = create_tensor<TensorType>(shape, data_type, 1, quantization_info, data_layout);
-        TensorType dst;
+        TensorType        src       = create_tensor<TensorType>(shape, data_type, 1, input_qinfo, data_layout);
+        const TensorShape dst_shape = misc::shape_calculator::compute_pool_shape(*(src.info()), info);
+        TensorType        dst       = create_tensor<TensorType>(dst_shape, data_type, 1, output_qinfo, data_layout);
 
         // Create and configure function
         FunctionType pool_layer;
@@ -107,21 +112,19 @@
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &shape, PoolingLayerInfo info,
-                                      DataType data_type, QuantizationInfo quantization_info)
+    SimpleTensor<T> compute_reference(const TensorShape &shape, PoolingLayerInfo info, DataType data_type, QuantizationInfo input_qinfo, QuantizationInfo output_qinfo)
     {
         // Create reference
-        SimpleTensor<T> src{ shape, data_type, 1, quantization_info };
+        SimpleTensor<T> src{ shape, data_type, 1, input_qinfo };
 
         // Fill reference
         fill(src);
 
-        return reference::pooling_layer<T>(src, info);
+        return reference::pooling_layer<T>(src, info, output_qinfo);
     }
 
     TensorType       _target{};
     SimpleTensor<T>  _reference{};
-    QuantizationInfo _quantization_info{};
     PoolingLayerInfo _pool_info{};
 };
 
@@ -133,7 +136,7 @@
     void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, DataLayout data_layout)
     {
         PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, pad_stride_info, exclude_padding),
-                                                                                               data_type, data_layout, QuantizationInfo());
+                                                                                               data_type, data_layout);
     }
 };
 
@@ -142,11 +145,10 @@
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type,
-               QuantizationInfo quantization_info, DataLayout data_layout = DataLayout::NCHW)
+    void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, DataLayout data_layout = DataLayout::NCHW)
     {
         PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, pad_stride_info, exclude_padding),
-                                                                                               data_type, data_layout, quantization_info);
+                                                                                               data_type, data_layout);
     }
 };
 
@@ -157,7 +159,7 @@
     template <typename...>
     void setup(TensorShape src_shape, PoolingLayerInfo pool_info, DataType data_type)
     {
-        PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, pool_info, data_type, DataLayout::NCHW, QuantizationInfo());
+        PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, pool_info, data_type, DataLayout::NCHW);
     }
 };
 
@@ -168,7 +170,7 @@
     template <typename...>
     void setup(TensorShape shape, PoolingType pool_type, DataType data_type, DataLayout data_layout = DataLayout::NCHW)
     {
-        PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type), data_type, DataLayout::NCHW, QuantizationInfo());
+        PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type), data_type, DataLayout::NCHW);
     }
 };
 
diff --git a/tests/validation/fixtures/QuantizationLayerFixture.h b/tests/validation/fixtures/QuantizationLayerFixture.h
index 8590b71..84d4d7a 100644
--- a/tests/validation/fixtures/QuantizationLayerFixture.h
+++ b/tests/validation/fixtures/QuantizationLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,10 +47,10 @@
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type)
+    void setup(TensorShape shape, DataType data_type, QuantizationInfo quant_info)
     {
-        _target    = compute_target(shape, data_type);
-        _reference = compute_reference(shape, data_type);
+        _target    = compute_target(shape, data_type, quant_info);
+        _reference = compute_reference(shape, data_type, quant_info);
     }
 
 protected:
@@ -60,11 +60,11 @@
         library->fill_tensor_uniform(tensor, 0);
     }
 
-    TensorType compute_target(const TensorShape &shape, DataType data_type)
+    TensorType compute_target(const TensorShape &shape, DataType data_type, QuantizationInfo quant_info)
     {
         // Create tensors
         TensorType src = create_tensor<TensorType>(shape, data_type);
-        TensorType dst = create_tensor<TensorType>(shape, DataType::U8);
+        TensorType dst = create_tensor<TensorType>(shape, DataType::QASYMM8, 1, quant_info);
 
         // Create and configure function
         FunctionType quantization_layer;
@@ -89,7 +89,7 @@
         return dst;
     }
 
-    SimpleTensor<uint8_t> compute_reference(const TensorShape &shape, DataType data_type)
+    SimpleTensor<uint8_t> compute_reference(const TensorShape &shape, DataType data_type, QuantizationInfo quant_info)
     {
         // Create reference
         SimpleTensor<T> src{ shape, data_type };
@@ -97,12 +97,13 @@
         // Fill reference
         fill(src);
 
-        return reference::quantization_layer<T>(src);
+        return reference::quantization_layer<T>(src, quant_info);
     }
 
     TensorType            _target{};
     SimpleTensor<uint8_t> _reference{};
 };
+
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/reference/ArithmeticDivision.cpp b/tests/validation/reference/ArithmeticDivision.cpp
index 0102231..0ced439 100644
--- a/tests/validation/reference/ArithmeticDivision.cpp
+++ b/tests/validation/reference/ArithmeticDivision.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -77,7 +77,9 @@
 {
     SimpleTensor<T> dst(TensorShape::broadcast_shape(src1.shape(), src2.shape()), data_type);
 
-    Coordinates id_src1, id_src2, id_dst;
+    Coordinates id_src1{};
+    Coordinates id_src2{};
+    Coordinates id_dst{};
 
     BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(src1, src2, dst, id_src1, id_src2, id_dst);
 
diff --git a/tests/validation/reference/ArithmeticOperations.cpp b/tests/validation/reference/ArithmeticOperations.cpp
index 062be93..a6205af 100644
--- a/tests/validation/reference/ArithmeticOperations.cpp
+++ b/tests/validation/reference/ArithmeticOperations.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -88,7 +88,9 @@
 template <typename T>
 SimpleTensor<T> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, SimpleTensor<T> &dst, ConvertPolicy convert_policy)
 {
-    Coordinates id_src1, id_src2, id_dst;
+    Coordinates id_src1{};
+    Coordinates id_src2{};
+    Coordinates id_dst{};
 
     BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, convert_policy, id_src1, id_src2, id_dst);
 
@@ -98,14 +100,16 @@
 template <>
 SimpleTensor<uint8_t> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<uint8_t> &src1, const SimpleTensor<uint8_t> &src2, SimpleTensor<uint8_t> &dst, ConvertPolicy convert_policy)
 {
+    Coordinates id_src1{};
+    Coordinates id_src2{};
+    Coordinates id_dst{};
+
     if(dst.data_type() == DataType::QASYMM8)
     {
         SimpleTensor<float> src1_tmp = convert_from_asymmetric(src1);
         SimpleTensor<float> src2_tmp = convert_from_asymmetric(src2);
         SimpleTensor<float> dst_tmp(TensorShape::broadcast_shape(src1.shape(), src2.shape()), dst.data_type());
 
-        Coordinates id_src1, id_src2, id_dst;
-
         BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1_tmp, src2_tmp, dst_tmp, convert_policy, id_src1, id_src2, id_dst);
 
         dst = convert_to_asymmetric(dst_tmp, dst.quantization_info());
@@ -114,8 +118,6 @@
     else
     {
         // DataType::U8
-        Coordinates id_src1, id_src2, id_dst;
-
         BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, convert_policy, id_src1, id_src2, id_dst);
 
         return dst;
diff --git a/tests/validation/reference/CannyEdgeDetector.cpp b/tests/validation/reference/CannyEdgeDetector.cpp
index 92a11db..a952dde 100644
--- a/tests/validation/reference/CannyEdgeDetector.cpp
+++ b/tests/validation/reference/CannyEdgeDetector.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -117,7 +117,8 @@
     ValidRegion     valid_region = shape_to_valid_region(src.shape(), border_mode == BorderMode::UNDEFINED, BorderSize(gradient_size / 2 + 1));
 
     // Sobel computation: U == int16_t or int32_t
-    SimpleTensor<U> gx, gy;
+    SimpleTensor<U> gx{};
+    SimpleTensor<U> gy{};
     std::tie(gx, gy) = sobel<U>(src, gradient_size, border_mode, constant_border_value, GradientDimension::GRAD_XY);
 
     using unsigned_U = typename traits::make_unsigned_conditional_t<U>::type;
@@ -178,7 +179,8 @@
             continue;
         }
 
-        unsigned_U mag_90, mag90;
+        unsigned_U mag_90;
+        unsigned_U mag90;
         switch(grad_dir[i])
         {
             case 0: // North/South edge direction, compare against East/West pixels (left & right)
diff --git a/tests/validation/reference/Comparisons.cpp b/tests/validation/reference/Comparisons.cpp
index a83c365..6d08daf 100644
--- a/tests/validation/reference/Comparisons.cpp
+++ b/tests/validation/reference/Comparisons.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -109,7 +109,9 @@
 {
     SimpleTensor<uint8_t> dst(TensorShape::broadcast_shape(src1.shape(), src2.shape()), DataType::U8);
 
-    Coordinates id_src1, id_src2, id_dst;
+    Coordinates id_src1{};
+    Coordinates id_src2{};
+    Coordinates id_dst{};
     BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, id_src1, id_src2, id_dst);
     return dst;
 }
@@ -119,18 +121,19 @@
 {
     SimpleTensor<uint8_t> dst(TensorShape::broadcast_shape(src1.shape(), src2.shape()), DataType::U8);
 
+    Coordinates id_src1{};
+    Coordinates id_src2{};
+    Coordinates id_dst{};
+
     if(src1.data_type() == DataType::QASYMM8)
     {
         SimpleTensor<float> src1_tmp = convert_from_asymmetric(src1);
         SimpleTensor<float> src2_tmp = convert_from_asymmetric(src2);
-
-        Coordinates id_src1, id_src2, id_dst;
         BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1_tmp, src2_tmp, dst, id_src1, id_src2, id_dst);
     }
     else
     {
         // DataType::U8
-        Coordinates id_src1, id_src2, id_dst;
         BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, id_src1, id_src2, id_dst);
     }
     return dst;
diff --git a/tests/validation/reference/WidthConcatenateLayer.cpp b/tests/validation/reference/ConcatenateLayer.cpp
similarity index 69%
rename from tests/validation/reference/WidthConcatenateLayer.cpp
rename to tests/validation/reference/ConcatenateLayer.cpp
index 3854339..af818a5 100644
--- a/tests/validation/reference/WidthConcatenateLayer.cpp
+++ b/tests/validation/reference/ConcatenateLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,9 +21,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "WidthConcatenateLayer.h"
+#include "ConcatenateLayer.h"
 
 #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/Permute.h"
 
 namespace arm_compute
 {
@@ -33,24 +34,23 @@
 {
 namespace reference
 {
+namespace
+{
 template <typename T>
 SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst)
 {
     // Create reference
     std::vector<TensorShape> shapes;
-
+    shapes.reserve(srcs.size());
     for(const auto &src : srcs)
     {
         shapes.emplace_back(src.shape());
     }
-
     // Compute reference
     int       width_offset = 0;
     const int width_out    = dst.shape().x();
-
     // Set output tensor to 0
     std::fill_n(dst.data(), dst.num_elements(), 0);
-
     for(const auto &src : srcs)
     {
         ARM_COMPUTE_ERROR_ON(width_offset >= width_out);
@@ -89,13 +89,52 @@
         }
         width_offset += width;
     }
-
     return dst;
 }
 
 template SimpleTensor<float> widthconcatenate_layer(const std::vector<SimpleTensor<float>> &srcs, SimpleTensor<float> &dst);
 template SimpleTensor<half> widthconcatenate_layer(const std::vector<SimpleTensor<half>> &srcs, SimpleTensor<half> &dst);
 template SimpleTensor<uint8_t> widthconcatenate_layer(const std::vector<SimpleTensor<uint8_t>> &srcs, SimpleTensor<uint8_t> &dst);
+} // namespace
+
+template <typename T>
+SimpleTensor<T> concatenate_layer(std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst, unsigned int axis)
+{
+    switch(axis)
+    {
+        case Window::DimX:
+        {
+            return widthconcatenate_layer(srcs, dst);
+        }
+        case Window::DimY:
+        {
+            for(auto &t : srcs)
+            {
+                t = reference::permute<T>(t, PermutationVector(1U, 0U));
+            }
+            dst = reference::permute<T>(dst, PermutationVector(1U, 0U));
+            return reference::permute<T>(widthconcatenate_layer(srcs, dst), PermutationVector(1U, 0U));
+        }
+        case Window::DimZ:
+        {
+            for(auto &t : srcs)
+            {
+                t = reference::permute<T>(t, PermutationVector(2U, 1U, 0U));
+            }
+            dst = reference::permute<T>(dst, PermutationVector(2U, 1U, 0U));
+            return reference::permute<T>(widthconcatenate_layer(srcs, dst), PermutationVector(2U, 1U, 0U));
+        }
+        default:
+        {
+            ARM_COMPUTE_ERROR("Not supported");
+            return dst;
+        }
+    }
+}
+
+template SimpleTensor<float> concatenate_layer(std::vector<SimpleTensor<float>> &srcs, SimpleTensor<float> &dst, unsigned int axis);
+template SimpleTensor<half> concatenate_layer(std::vector<SimpleTensor<half>> &srcs, SimpleTensor<half> &dst, unsigned int axis);
+template SimpleTensor<uint8_t> concatenate_layer(std::vector<SimpleTensor<uint8_t>> &srcs, SimpleTensor<uint8_t> &dst, unsigned int axis);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/WidthConcatenateLayer.h b/tests/validation/reference/ConcatenateLayer.h
similarity index 81%
rename from tests/validation/reference/WidthConcatenateLayer.h
rename to tests/validation/reference/ConcatenateLayer.h
index 0f1f428..14fd097 100644
--- a/tests/validation/reference/WidthConcatenateLayer.h
+++ b/tests/validation/reference/ConcatenateLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_H__
-#define __ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_H__
+#ifndef __ARM_COMPUTE_TEST_CONCATENATE_LAYER_H__
+#define __ARM_COMPUTE_TEST_CONCATENATE_LAYER_H__
 
 #include "tests/SimpleTensor.h"
 
@@ -37,9 +37,9 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst);
+SimpleTensor<T> concatenate_layer(std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst, unsigned int axis);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_H__ */
+#endif /* __ARM_COMPUTE_TEST_CONCATENATE_LAYER_H__ */
diff --git a/tests/validation/reference/ConvolutionLayer.cpp b/tests/validation/reference/ConvolutionLayer.cpp
index f41a6fc..6909011 100644
--- a/tests/validation/reference/ConvolutionLayer.cpp
+++ b/tests/validation/reference/ConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -111,10 +111,15 @@
 }
 template <typename T, typename TB>
 SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info,
-                                  const Size2D &dilation, unsigned int num_groups)
+                                  const Size2D &dilation, unsigned int num_groups, QuantizationInfo out_quant_info)
 {
+    // if no explicit quantization has been set you the same as src
+    if(out_quant_info == QuantizationInfo())
+    {
+        out_quant_info = src.quantization_info();
+    }
     // Create reference
-    SimpleTensor<T> dst{ output_shape, src.data_type(), 1, src.quantization_info() };
+    SimpleTensor<T> dst{ output_shape, src.data_type(), 1, out_quant_info };
 
     if(src.data_layout() == DataLayout::NHWC)
     {
@@ -131,11 +136,11 @@
 }
 
 template SimpleTensor<float> convolution_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, const TensorShape &output_shape,
-                                               const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups);
+                                               const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups, QuantizationInfo out_quant_info);
 template SimpleTensor<half> convolution_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, const TensorShape &output_shape,
-                                              const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups);
+                                              const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups, QuantizationInfo out_quant_info);
 template SimpleTensor<uint8_t> convolution_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &output_shape,
-                                                 const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups);
+                                                 const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups, QuantizationInfo out_quant_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/ConvolutionLayer.h b/tests/validation/reference/ConvolutionLayer.h
index ccce53a..c51a9b3 100644
--- a/tests/validation/reference/ConvolutionLayer.h
+++ b/tests/validation/reference/ConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,7 +37,7 @@
 {
 template <typename T, typename TB>
 SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info,
-                                  const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1);
+                                  const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1, QuantizationInfo out_quant_info = QuantizationInfo());
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/CropResize.cpp b/tests/validation/reference/CropResize.cpp
new file mode 100644
index 0000000..8cfce97
--- /dev/null
+++ b/tests/validation/reference/CropResize.cpp
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CropResize.h"
+#include "Utils.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+SimpleTensor<float> scale_image(const SimpleTensor<float> &in, const TensorShape &out_shape, InterpolationPolicy policy, float extrapolation_value)
+{
+    ARM_COMPUTE_ERROR_ON(in.data_layout() != DataLayout::NHWC);
+
+    SimpleTensor<float> out{ out_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC };
+    // Compute the ratio between source width/height and destination width/height
+    const auto wr = static_cast<float>(in.shape()[1]) / static_cast<float>(out_shape[1]);
+    const auto hr = static_cast<float>(in.shape()[2]) / static_cast<float>(out_shape[2]);
+
+    const auto width  = static_cast<int>(in.shape().y());
+    const auto height = static_cast<int>(in.shape().z());
+
+    Window win;
+    win.use_tensor_dimensions(out_shape);
+    execute_window_loop(win, [&](const Coordinates & out_id)
+    {
+        Coordinates in_id(out_id);
+        int         idw = in_id.y();
+        int         idh = in_id.z();
+
+        switch(policy)
+        {
+            case InterpolationPolicy::NEAREST_NEIGHBOR:
+            {
+                //Calculate the source coords without -0.5f is equivalent to round the x_scr/y_src coords
+                float x_src = (idw + 0.5f) * wr;
+                float y_src = (idh + 0.5f) * hr;
+                in_id.set(1, x_src);
+                in_id.set(2, y_src);
+
+                // If coordinates in range of tensor's width or height
+                if(is_valid_pixel_index(x_src, y_src, width, height, 0))
+                {
+                    *reinterpret_cast<float *>(out(out_id)) = tensor_elem_at(in, in_id, BorderMode::CONSTANT, extrapolation_value);
+                }
+                else
+                {
+                    *reinterpret_cast<float *>(out(out_id)) = extrapolation_value;
+                }
+                break;
+            }
+            case InterpolationPolicy::BILINEAR:
+            {
+                float x_src = idw * wr;
+                float y_src = idh * hr;
+                in_id.set(1, std::floor(x_src));
+                in_id.set(2, std::floor(y_src));
+                if(is_valid_pixel_index(x_src, y_src, width, height, 0))
+                {
+                    const int id_w = in_id[1];
+                    const int id_h = in_id[2];
+
+                    const float dx   = x_src - id_w;
+                    const float dy   = y_src - id_h;
+                    const float dx_1 = 1.0f - dx;
+                    const float dy_1 = 1.0f - dy;
+
+                    in_id.set(1, id_w);
+                    in_id.set(2, id_h);
+                    const float tl = tensor_elem_at(in, in_id, BorderMode::CONSTANT, extrapolation_value);
+                    in_id.set(1, id_w + 1);
+                    in_id.set(2, id_h);
+                    const float tr = tensor_elem_at(in, in_id, BorderMode::CONSTANT, extrapolation_value);
+                    in_id.set(1, id_w);
+                    in_id.set(2, id_h + 1);
+                    const float bl = tensor_elem_at(in, in_id, BorderMode::CONSTANT, extrapolation_value);
+                    in_id.set(1, id_w + 1);
+                    in_id.set(2, id_h + 1);
+                    const float br = tensor_elem_at(in, in_id, BorderMode::CONSTANT, extrapolation_value);
+
+                    *reinterpret_cast<float *>(out(out_id)) = tl * (dx_1 * dy_1) + tr * (dx * dy_1) + bl * (dx_1 * dy) + br * (dx * dy);
+                }
+                else
+                {
+                    *reinterpret_cast<float *>(out(out_id)) = extrapolation_value;
+                }
+                break;
+            }
+            default:
+                ARM_COMPUTE_ERROR("Unsupported interpolation mode");
+        }
+    });
+
+    return out;
+}
+
+template <typename T>
+SimpleTensor<float> crop_image(const SimpleTensor<T> &src, Coordinates start, Coordinates end, int32_t batch_index, float extrapolation_value)
+{
+    TensorShape out_shape(src.shape()[0], abs(end[0] - start[0]) + 1, abs(end[1] - start[1]) + 1);
+
+    SimpleTensor<float> out{ out_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC };
+
+    Window win;
+    win.use_tensor_dimensions(out_shape);
+    execute_window_loop(win, [&](const Coordinates & id)
+    {
+        bool        out_of_bounds = false;
+        Coordinates offset(id[0], 0, 0, batch_index);
+        for(uint32_t i = 1; i < 3; ++i)
+        {
+            offset.set(i, end[i - 1] < start[i - 1] ? start[i - 1] - id[i] : start[i - 1] + id[i]);
+            if(offset[i] < 0 || static_cast<uint32_t>(offset[i]) > src.shape()[i] - 1)
+            {
+                out_of_bounds = true;
+                break;
+            }
+        }
+        if(!out_of_bounds)
+        {
+            *reinterpret_cast<float *>(out(id)) = static_cast<float>(*reinterpret_cast<const T *>(src(offset)));
+        }
+        else
+        {
+            *reinterpret_cast<float *>(out(id)) = extrapolation_value;
+        }
+    });
+    return out;
+}
+
+} // namespace
+
+template <typename T>
+SimpleTensor<float> crop_and_resize(const SimpleTensor<T> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                    Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value)
+{
+    ARM_COMPUTE_ERROR_ON(src.shape().num_dimensions() > 4);
+    ARM_COMPUTE_ERROR_ON(src.data_layout() != DataLayout::NHWC);
+
+    const TensorShape   out_shape(src.shape()[0], crop_size.x, crop_size.y, boxes.shape()[1]);
+    SimpleTensor<float> out{ out_shape, DataType::F32, 1, QuantizationInfo(), DataLayout::NHWC };
+
+    const TensorShape scaled_image_shape(src.shape()[0], crop_size.x, crop_size.y);
+
+    for(uint32_t i = 0; i < boxes.shape()[1]; ++i)
+    {
+        Coordinates start = Coordinates(std::floor((*reinterpret_cast<const float *>(boxes(Coordinates(1, i)))) * (src.shape()[1] - 1) + 0.5f),
+                                        std::floor((*reinterpret_cast<const float *>(boxes(Coordinates(0, i)))) * (src.shape()[2] - 1) + 0.5f));
+        Coordinates end = Coordinates(std::floor((*reinterpret_cast<const float *>(boxes(Coordinates(3, i)))) * (src.shape()[1] - 1) + 0.5f),
+                                      std::floor((*reinterpret_cast<const float *>(boxes(Coordinates(2, i)))) * (src.shape()[2] - 1) + 0.5f));
+        SimpleTensor<float> cropped = crop_image(src, start, end, box_ind[i], extrapolation_value);
+        SimpleTensor<float> scaled  = scale_image(cropped, scaled_image_shape, method, extrapolation_value);
+        std::copy_n(reinterpret_cast<float *>(scaled.data()), scaled.num_elements(), reinterpret_cast<float *>(out(Coordinates(0, 0, 0, i))));
+    }
+    return out;
+}
+
+template SimpleTensor<float> crop_and_resize(const SimpleTensor<float> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                             Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+template SimpleTensor<float> crop_and_resize(const SimpleTensor<uint16_t> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                             Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+template SimpleTensor<float> crop_and_resize(const SimpleTensor<uint32_t> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                             Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+template SimpleTensor<float> crop_and_resize(const SimpleTensor<int16_t> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                             Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+template SimpleTensor<float> crop_and_resize(const SimpleTensor<int32_t> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                             Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+template SimpleTensor<float> crop_and_resize(const SimpleTensor<half> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                             Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/WidthConcatenateLayer.h b/tests/validation/reference/CropResize.h
similarity index 76%
copy from tests/validation/reference/WidthConcatenateLayer.h
copy to tests/validation/reference/CropResize.h
index 0f1f428..517c24b 100644
--- a/tests/validation/reference/WidthConcatenateLayer.h
+++ b/tests/validation/reference/CropResize.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,13 +21,11 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_H__
-#define __ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_H__
+#ifndef __ARM_COMPUTE_TEST_CROP_RESIZE_H__
+#define __ARM_COMPUTE_TEST_CROP_RESIZE_H__
 
 #include "tests/SimpleTensor.h"
 
-#include <vector>
-
 namespace arm_compute
 {
 namespace test
@@ -37,9 +35,10 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> widthconcatenate_layer(const std::vector<SimpleTensor<T>> &srcs, SimpleTensor<T> &dst);
+SimpleTensor<float> crop_and_resize(const SimpleTensor<T> &src, const SimpleTensor<float> &boxes, SimpleTensor<int32_t> box_ind,
+                                    Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_WIDTHCONCATENATE_LAYER_H__ */
+#endif /* __ARM_COMPUTE_TEST_CROP_RESIZE_H__ */
diff --git a/tests/validation/reference/DFT.cpp b/tests/validation/reference/DFT.cpp
new file mode 100644
index 0000000..6ad1b9e
--- /dev/null
+++ b/tests/validation/reference/DFT.cpp
@@ -0,0 +1,420 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "DFT.h"
+
+#include "PadLayer.h"
+#include "Permute.h"
+#include "Reverse.h"
+#include "SliceOperations.h"
+
+#include <cmath>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+/** Performs an one dimensional DFT on a given real sequence.
+ *
+ * @param[in]  src_ptr Pointer to the real input sequence.
+ * @param[in]  N       Size of input sequence.
+ * @param[out] dst_ptr Pointer to the complex output sequence.
+ * @param[out] K       Size of the output sequence
+ */
+template <typename T>
+void rdft_1d_step(const T *src_ptr, size_t N, T *dst_ptr, size_t K)
+{
+    for(unsigned int k = 0; k < K; ++k)
+    {
+        float Xr = 0;
+        float Xi = 0;
+        for(unsigned int n = 0; n < N; ++n)
+        {
+            const float alpha = (2 * M_PI * k * n) / N;
+            const float val_r = src_ptr[n];
+            // Assuming DFT from the R domain thus skipping imaginary calculations
+            Xr += val_r * cos(alpha);
+            Xi -= val_r * sin(alpha);
+        }
+
+        dst_ptr[k * 2]     = Xr;
+        dst_ptr[k * 2 + 1] = Xi;
+    }
+}
+
+/** Performs an one dimensional DFT on a given complex sequence.
+ *
+ * @param[in]  src_ptr Pointer to the complex input sequence.
+ * @param[out] dst_ptr Pointer to the complex output sequence.
+ * @param[in]  N       Size of the sequences
+ */
+template <typename T>
+void dft_1d_step(const T *src_ptr, T *dst_ptr, size_t N)
+{
+    for(unsigned int k = 0; k < N; ++k)
+    {
+        float Xr = 0;
+        float Xi = 0;
+        for(unsigned int n = 0; n < N; ++n)
+        {
+            const float alpha     = (2 * M_PI * k * n) / N;
+            const float val_r     = src_ptr[2 * n];
+            const float val_i     = src_ptr[2 * n + 1];
+            const float cos_alpha = cos(alpha);
+            const float sin_alpha = sin(alpha);
+
+            Xr += val_r * cos_alpha + val_i * sin_alpha;
+            Xi += val_i * cos_alpha - val_r * sin_alpha;
+        }
+
+        dst_ptr[k * 2]     = Xr;
+        dst_ptr[k * 2 + 1] = Xi;
+    }
+}
+
+/** Performs an one dimensional inverse DFT on a given real sequence.
+ *
+ * @param[in]  src_ptr Pointer to the real input sequence.
+ * @param[in]  K       Size of input sequence.
+ * @param[out] dst_ptr Pointer to the complex output sequence.
+ * @param[out] N       Size of the output sequence
+ */
+template <typename T>
+void irdft_1d_step(const T *src_ptr, size_t K, T *dst_ptr, size_t N)
+{
+    const bool         is_odd     = N % 2;
+    const unsigned int Nleft      = N - K;
+    const int          tail_start = is_odd ? K - 1 : K - 2;
+
+    for(unsigned int n = 0; n < N; ++n)
+    {
+        float xr = 0;
+        for(unsigned int k = 0; k < K; ++k)
+        {
+            const float alpha = (2 * M_PI * k * n) / N;
+            xr += src_ptr[2 * k] * cos(alpha) - src_ptr[2 * k + 1] * sin(alpha);
+        }
+
+        unsigned int j = tail_start;
+        for(unsigned int k = 0; k < Nleft; ++k)
+        {
+            const float alpha = (2 * M_PI * (k + K) * n) / N;
+            xr += src_ptr[2 * j] * cos(alpha) + src_ptr[2 * j + 1] * sin(alpha);
+            --j;
+        }
+
+        dst_ptr[n] = xr;
+    }
+}
+
+/** Performs an one dimensional inverse DFT on a given complex sequence.
+ *
+ * @param[in]  src_ptr Pointer to the complex input sequence.
+ * @param[out] dst_ptr Pointer to the complex output sequence.
+ * @param[in]  N       Size of the sequences
+ */
+template <typename T>
+void idft_1d_step(const T *src_ptr, T *dst_ptr, size_t N)
+{
+    for(unsigned int n = 0; n < N; ++n)
+    {
+        float xr = 0;
+        float xi = 0;
+        for(unsigned int k = 0; k < N; ++k)
+        {
+            const float alpha     = (2 * M_PI * k * n) / N;
+            const float cos_alpha = cos(alpha);
+            const float sin_alpha = sin(alpha);
+            const float val_r     = src_ptr[2 * k];
+            const float val_i     = src_ptr[2 * k + 1];
+
+            xr += val_r * cos_alpha - val_i * sin_alpha;
+            xi += val_i * cos_alpha + val_r * sin_alpha;
+        }
+
+        dst_ptr[2 * n]     = xr;
+        dst_ptr[2 * n + 1] = xi;
+    }
+}
+
+template <typename T>
+SimpleTensor<T> rdft_1d_core(const SimpleTensor<T> &src, FFTDirection direction, bool is_odd)
+{
+    // Performs only rdft
+    ARM_COMPUTE_ERROR_ON(direction == FFTDirection::Forward && src.num_channels() != 1);
+    ARM_COMPUTE_ERROR_ON(direction == FFTDirection::Inverse && src.num_channels() != 2);
+
+    const unsigned int inverse_tail = is_odd ? 1 : 0;
+    const unsigned int N            = src.shape()[0];
+    const unsigned int K            = direction == FFTDirection::Forward ? N / 2 + 1 : (N - 1) * 2 + inverse_tail;
+    const unsigned int num_channels = direction == FFTDirection::Forward ? 2 : 1;
+
+    TensorShape dst_shape = src.shape();
+    dst_shape.set(0, K);
+
+    SimpleTensor<T> dst(dst_shape, src.data_type(), num_channels);
+
+    const unsigned int upper_dims = src.shape().total_size_upper(1);
+    for(unsigned int du = 0; du < upper_dims; ++du)
+    {
+        const T *src_row_ptr = src.data() + du * N * src.num_channels();
+        T       *dst_row_ptr = dst.data() + du * K * dst.num_channels();
+        direction == FFTDirection::Forward ? rdft_1d_step(src_row_ptr, N, dst_row_ptr, K) : irdft_1d_step(src_row_ptr, N, dst_row_ptr, K);
+    }
+
+    return dst;
+}
+
+template <typename T>
+SimpleTensor<T> dft_1d_core(const SimpleTensor<T> &src, FFTDirection direction)
+{
+    ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
+
+    const unsigned int N = src.shape()[0];
+
+    SimpleTensor<T> dst(src.shape(), src.data_type(), src.num_channels());
+
+    const unsigned int upper_dims = src.shape().total_size_upper(1);
+    for(unsigned int du = 0; du < upper_dims; ++du)
+    {
+        const T *src_row_ptr = src.data() + du * N * src.num_channels();
+        T       *dst_row_ptr = dst.data() + du * N * dst.num_channels();
+        direction == FFTDirection::Forward ? dft_1d_step(src_row_ptr, dst_row_ptr, N) : idft_1d_step(src_row_ptr, dst_row_ptr, N);
+    }
+
+    return dst;
+}
+
+/** Scale a tensor by a given scaling factor.
+ *
+ * @param[in,out] tensor         Tensor to scale.
+ * @param[in]     scaling_factor Scaling to scale the tensor data with.
+ */
+template <typename T>
+void scale(SimpleTensor<T> &tensor, T scaling_factor)
+{
+    const int total_elements = tensor.num_elements() * tensor.num_channels();
+    T        *data_ptr       = tensor.data();
+    for(int i = 0; i < total_elements; ++i)
+    {
+        data_ptr[i] /= scaling_factor;
+    }
+}
+
+/** Performs a complex element-wise multiplication with reduction across the channels axis.
+ *
+ * @param[in] input   Input tensor.
+ * @param[in] weights Weights tensor.
+ *
+ * @return Output tensor.
+ */
+template <typename T>
+SimpleTensor<T> complex_mul_and_reduce(const SimpleTensor<T> &input, const SimpleTensor<T> &weights)
+{
+    const int W  = input.shape().x();
+    const int H  = input.shape().y();
+    const int Ci = input.shape().z();
+    const int Co = weights.shape()[3];
+    const int N  = input.shape().total_size() / (W * H * Ci);
+
+    TensorShape output_shape = input.shape();
+    output_shape.set(2, Co);
+    SimpleTensor<T> dst(output_shape, input.data_type(), input.num_channels());
+
+    // MemSet dst memory to zero
+    std::memset(dst.data(), 0, dst.size());
+
+    for(int b = 0; b < N; ++b)
+    {
+        for(int co = 0; co < Co; ++co)
+        {
+            for(int ci = 0; ci < Ci; ++ci)
+            {
+                for(int h = 0; h < H; ++h)
+                {
+                    for(int w = 0; w < W; ++w)
+                    {
+                        size_t            i_index  = w + h * W + ci * H * W + b * H * W * Ci;
+                        size_t            w_index  = w + h * W + ci * H * W + co * H * W * Ci;
+                        size_t            o_index  = w + h * W + co * H * W + b * H * W * Co;
+                        const Coordinates i_coords = index2coords(input.shape(), i_index);
+                        const Coordinates w_coords = index2coords(weights.shape(), w_index);
+                        const Coordinates o_coords = index2coords(dst.shape(), o_index);
+
+                        auto i_ptr = static_cast<const T *>(input(i_coords));
+                        auto w_ptr = static_cast<const T *>(weights(w_coords));
+                        auto o_ptr = static_cast<T *>(dst(o_coords));
+
+                        const T Rin = i_ptr[0];
+                        const T Iin = i_ptr[1];
+                        const T Rw  = w_ptr[0];
+                        const T Iw  = w_ptr[1];
+
+                        o_ptr[0] += Rin * Rw - Iin * Iw;
+                        o_ptr[1] += Rin * Iw + Rw * Iin;
+                    }
+                }
+            }
+        }
+    }
+    return dst;
+}
+} // namespace
+
+template <typename T>
+SimpleTensor<T> rdft_1d(const SimpleTensor<T> &src)
+{
+    return rdft_1d_core(src, FFTDirection::Forward, false);
+}
+
+template <typename T>
+SimpleTensor<T> ridft_1d(const SimpleTensor<T> &src, bool is_odd)
+{
+    auto dst = rdft_1d_core(src, FFTDirection::Inverse, is_odd);
+
+    const T scaling_factor = dst.shape()[0];
+    scale(dst, scaling_factor);
+
+    return dst;
+}
+
+template <typename T>
+SimpleTensor<T> dft_1d(const SimpleTensor<T> &src, FFTDirection direction)
+{
+    auto dst = dft_1d_core(src, direction);
+    if(direction == FFTDirection::Inverse)
+    {
+        const T scaling_factor = dst.shape()[0];
+        scale(dst, scaling_factor);
+    }
+    return dst;
+}
+
+template <typename T>
+SimpleTensor<T> rdft_2d(const SimpleTensor<T> &src)
+{
+    ARM_COMPUTE_ERROR_ON(src.num_channels() != 1);
+    constexpr FFTDirection direction = FFTDirection::Forward;
+
+    auto first_pass  = rdft_1d_core(src, direction, false);
+    auto transposed  = permute(first_pass, PermutationVector(1U, 0U));
+    auto second_pass = dft_1d_core(transposed, direction);
+    return permute(second_pass, PermutationVector(1U, 0U));
+}
+
+template <typename T>
+SimpleTensor<T> ridft_2d(const SimpleTensor<T> &src, bool is_odd)
+{
+    ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
+    constexpr FFTDirection direction = FFTDirection::Inverse;
+
+    auto transposed   = permute(src, PermutationVector(1U, 0U));
+    auto first_pass   = dft_1d_core(transposed, direction);
+    auto transposed_2 = permute(first_pass, PermutationVector(1U, 0U));
+    auto dst          = rdft_1d_core(transposed_2, direction, is_odd);
+
+    const T scaling_factor = dst.shape()[0] * dst.shape()[1];
+    scale(dst, scaling_factor);
+    return dst;
+}
+
+template <typename T>
+SimpleTensor<T> dft_2d(const SimpleTensor<T> &src, FFTDirection direction)
+{
+    ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
+
+    if(direction == FFTDirection::Forward)
+    {
+        auto first_pass  = dft_1d_core(src, direction);
+        auto transposed  = permute(first_pass, PermutationVector(1U, 0U));
+        auto second_pass = dft_1d_core(transposed, direction);
+        return permute(second_pass, PermutationVector(1U, 0U));
+    }
+    else
+    {
+        auto transposed   = permute(src, PermutationVector(1U, 0U));
+        auto first_pass   = dft_1d_core(transposed, direction);
+        auto transposed_2 = permute(first_pass, PermutationVector(1U, 0U));
+        auto dst          = dft_1d_core(transposed_2, direction);
+
+        const T scaling_factor = dst.shape()[0] * dst.shape()[1];
+        scale(dst, scaling_factor);
+
+        return dst;
+    }
+}
+
+template <typename T>
+SimpleTensor<T> conv2d_dft(const SimpleTensor<T> &src, const SimpleTensor<T> &w, const PadStrideInfo &conv_info)
+{
+    // Pad input to full padding
+    const PaddingList padding_in = { { 0, w.shape()[0] - 1 }, { 0, w.shape()[1] - 1 } };
+    auto              padded_src = pad_layer(src, padding_in);
+
+    // Flip weights
+    std::vector<uint32_t>  axis_v = { 0, 1 };
+    SimpleTensor<uint32_t> axis{ TensorShape(2U), DataType::U32 };
+    std::copy(axis_v.begin(), axis_v.begin() + axis.shape().x(), axis.data());
+    auto flipped_w = reverse(w, axis);
+
+    // Pad weights to have the same size as input
+    const PaddingList paddings_w = { { 0, src.shape()[0] - 1 }, { 0, src.shape()[1] - 1 } };
+    auto              padded_w   = pad_layer(flipped_w, paddings_w);
+
+    // Transform input and weights to frequency domain
+    auto Fsrc = rdft_2d(padded_src);
+    auto Fw   = rdft_2d(padded_w);
+
+    // Perform dot product
+    auto Fdst = complex_mul_and_reduce(Fsrc, Fw);
+
+    // Transform output back to frequency domain
+    auto conv_res = ridft_2d(Fdst);
+
+    // Slice output
+    const int start_left = w.shape().x() - conv_info.pad_left() - 1;
+    const int start_top  = w.shape().y() - conv_info.pad_top() - 1;
+    const int end_right  = conv_res.shape().x() - (w.shape().x() - conv_info.pad_right() - 1);
+    const int end_botton = conv_res.shape().y() - (w.shape().y() - conv_info.pad_bottom() - 1);
+    return slice(conv_res, Coordinates(start_left, start_top), Coordinates(end_right, end_botton));
+}
+
+template SimpleTensor<float> rdft_1d(const SimpleTensor<float> &src);
+template SimpleTensor<float> ridft_1d(const SimpleTensor<float> &src, bool is_odd);
+template SimpleTensor<float> dft_1d(const SimpleTensor<float> &src, FFTDirection direction);
+
+template SimpleTensor<float> rdft_2d(const SimpleTensor<float> &src);
+template SimpleTensor<float> ridft_2d(const SimpleTensor<float> &src, bool is_odd);
+template SimpleTensor<float> dft_2d(const SimpleTensor<float> &src, FFTDirection direction);
+
+template SimpleTensor<float> conv2d_dft(const SimpleTensor<float> &src, const SimpleTensor<float> &w, const PadStrideInfo &conv_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/DFT.h b/tests/validation/reference/DFT.h
new file mode 100644
index 0000000..a3a10ab
--- /dev/null
+++ b/tests/validation/reference/DFT.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_FFT_H__
+#define __ARM_COMPUTE_TEST_FFT_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+enum class FFTDirection
+{
+    Forward,
+    Inverse
+};
+
+/** Performs an one dimensional DFT on a real input.
+ *
+ * @param[in] src Source tensor.
+ *
+ * @return Complex output of length n/2 + 1 due to symmetry.
+ */
+template <typename T>
+SimpleTensor<T> rdft_1d(const SimpleTensor<T> &src);
+
+/** Performs an one dimensional inverse DFT on a real input.
+ *
+ * @param[in] src    Source tensor.
+ * @param[in] is_odd (Optional) Specifies if the output has odd dimensions.
+ *                   Is used by the inverse variant to reconstruct odd sequences.
+ *
+ * @return Complex output of length n/2 + 1 due to symmetry.
+ */
+template <typename T>
+SimpleTensor<T> ridft_1d(const SimpleTensor<T> &src, bool is_odd = false);
+
+/**  Performs an one dimensional DFT on a complex input.
+ *
+ * @param[in] src       Source tensor.
+ * @param[in] direction Direction of the DFT.
+ *
+ * @return Complex output of same length as input.
+ */
+template <typename T>
+SimpleTensor<T> dft_1d(const SimpleTensor<T> &src, FFTDirection direction);
+
+/** Performs a two dimensional DFT on a real input.
+ *
+ * @param[in] src Source tensor.
+ *
+ * @return Complex output of length n/2 + 1 across width due to symmetry and height of same size as the input.
+ */
+template <typename T>
+SimpleTensor<T> rdft_2d(const SimpleTensor<T> &src);
+
+/** Performs a two dimensional inverse DFT on a real input.
+ *
+ * @param[in] src    Source tensor.
+ * @param[in] is_odd (Optional) Specifies if the output has odd dimensions across width.
+ *                   Is used by the inverse variant to reconstruct odd sequences.
+ *
+ * @return Complex output of length n/2 + 1 across width due to symmetry and height of same size as the input.
+ */
+template <typename T>
+SimpleTensor<T> ridft_2d(const SimpleTensor<T> &src, bool is_odd = false);
+
+/**  Performs a two dimensional DFT on a complex input.
+ *
+ * @param[in] src       Source tensor.
+ * @param[in] direction Direction of the DFT.
+ *
+ * @return Complex output of same length as input.
+ */
+template <typename T>
+SimpleTensor<T> dft_2d(const SimpleTensor<T> &src, FFTDirection direction);
+
+/** Performs and DFT based convolution on a real input.
+ *
+ * @param[in] src       Source tensor.
+ * @param[in] w         Weights tensor.
+ * @param[in] conv_info Convolution related metadata.
+ *
+ * @return The output tensor.
+ */
+template <typename T>
+SimpleTensor<T> conv2d_dft(const SimpleTensor<T> &src, const SimpleTensor<T> &w, const PadStrideInfo &conv_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_FFT_H__ */
diff --git a/tests/validation/reference/DepthConcatenateLayer.cpp b/tests/validation/reference/DepthConcatenateLayer.cpp
index 139675d..22271a0 100644
--- a/tests/validation/reference/DepthConcatenateLayer.cpp
+++ b/tests/validation/reference/DepthConcatenateLayer.cpp
@@ -38,7 +38,7 @@
 {
     // Create reference
     std::vector<TensorShape> shapes;
-
+    shapes.reserve(srcs.size());
     for(const auto &src : srcs)
     {
         shapes.emplace_back(src.shape());
@@ -66,7 +66,7 @@
             {
                 auto       ptr_slice = static_cast<T *>(dst(Coordinates(0, 0, slice, b)));
                 const auto num_elems_in_slice((dst.num_elements() / depth_out) * src.shape().z());
-                std::transform(ptr_slice, ptr_slice + num_elems_in_slice, ptr_slice, [src, dst](T t)
+                std::transform(ptr_slice, ptr_slice + num_elems_in_slice, ptr_slice, [src, dst](T)
                 {
                     return dst.quantization_info().quantize(src.quantization_info().dequantize(0), RoundingPolicy::TO_NEAREST_UP);
                 });
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.cpp b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
index 39429e2..90ecffb 100644
--- a/tests/validation/reference/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,8 +50,10 @@
  */
 template <typename T, typename TB>
 SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info,
-                                      unsigned int depth_multiplier)
+                                      unsigned int depth_multiplier, const Size2D &dilation, QuantizationInfo out_quant_info)
 {
+    ARM_COMPUTE_UNUSED(out_quant_info);
+
     SimpleTensor<T> dst{ dst_shape, src.data_type(), 1 };
 
     // Compute reference
@@ -63,18 +65,24 @@
     const int input_depth   = src.shape().z();
     const int num_batches   = src.shape().total_size() / (input_width * input_height * input_depth);
 
-    const int filter_half_width  = filter_width / 2;
-    const int filter_half_height = filter_height / 2;
-
     const int pad_left   = conv_info.pad_left();
     const int pad_top    = conv_info.pad_top();
     const int pad_right  = conv_info.pad_right();
     const int pad_bottom = conv_info.pad_bottom();
 
-    const int minimum_x = -pad_left + filter_half_width;
-    const int minimum_y = -pad_top + filter_half_height;
-    const int maximum_x = input_width + pad_left - filter_half_width + pad_right - filter_half_width;
-    const int maximum_y = input_height + pad_top - filter_half_height + pad_bottom - filter_half_height;
+    const float patch_width  = (filter_width + (dilation.x() - 1) * (filter_width - 1));
+    const float patch_height = (filter_height + (dilation.y() - 1) * (filter_height - 1));
+
+    const int patch_half_width_floor  = patch_width / 2;
+    const int patch_half_height_floor = patch_height / 2;
+
+    const auto patch_half_width_ceil  = static_cast<int>(std::ceil(patch_width / 2));
+    const auto patch_half_height_ceil = static_cast<int>(std::ceil(patch_height / 2));
+
+    const int minimum_x = -pad_left + patch_half_width_floor;
+    const int minimum_y = -pad_top + patch_half_height_floor;
+    const int maximum_x = input_width + pad_left + pad_right - static_cast<int>(patch_width);
+    const int maximum_y = input_height + pad_top + pad_bottom - static_cast<int>(patch_height);
 
     const T border_value(0);
 
@@ -87,21 +95,20 @@
             {
                 const int out_z = z * depth_multiplier + m;
 
-                for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second)
+                for(int y = minimum_y; y <= minimum_y + maximum_y; y += conv_info.stride().second)
                 {
-                    for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first)
+                    for(int x = minimum_x; x <= minimum_x + maximum_x; x += conv_info.stride().first)
                     {
                         Coordinates coords(static_cast<int>(x), static_cast<int>(y), static_cast<int>(z), static_cast<int>(r));
                         size_t      filter_offset = filter_plane * out_z;
 
                         T val(0);
-                        for(int j = y - filter_half_height; j <= static_cast<int>(y + filter_half_height); ++j)
+                        for(int j = y - patch_half_height_floor; j < y + patch_half_height_ceil; j += dilation.y())
                         {
-                            for(int i = x - filter_half_width; i <= static_cast<int>(x + filter_half_width); ++i)
+                            for(int i = x - patch_half_width_floor; i < x + patch_half_width_ceil; i += dilation.x())
                             {
                                 coords.set(0, i);
                                 coords.set(1, j);
-
                                 val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, border_value);
                                 ++filter_offset;
                             }
@@ -119,9 +126,14 @@
 
 template <>
 SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
-                                            const PadStrideInfo &conv_info, unsigned int depth_multiplier)
+                                            const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, QuantizationInfo out_quant_info)
 {
-    SimpleTensor<uint8_t> dst{ dst_shape, src.data_type(), 1, src.quantization_info() };
+    // if no explicit quantization has been set you the same as src
+    if(out_quant_info == QuantizationInfo(0.0f, 0))
+    {
+        out_quant_info = src.quantization_info();
+    }
+    SimpleTensor<uint8_t> dst{ dst_shape, src.data_type(), 1, out_quant_info };
 
     // Create reference
     const int   input_offset   = -src.quantization_info().offset;
@@ -145,18 +157,24 @@
     const int input_depth   = src.shape().z();
     const int num_batches   = src.shape().total_size() / (input_width * input_height * input_depth);
 
-    const int filter_half_width  = filter_width / 2;
-    const int filter_half_height = filter_height / 2;
-
     const int pad_left   = conv_info.pad_left();
     const int pad_top    = conv_info.pad_top();
     const int pad_right  = conv_info.pad_right();
     const int pad_bottom = conv_info.pad_bottom();
 
-    const int minimum_x = -pad_left + filter_half_width;
-    const int minimum_y = -pad_top + filter_half_height;
-    const int maximum_x = input_width + pad_left - filter_half_width + pad_right - filter_half_width;
-    const int maximum_y = input_height + pad_top - filter_half_height + pad_bottom - filter_half_height;
+    const float patch_width  = (filter_width + (dilation.x() - 1) * (filter_width - 1));
+    const float patch_height = (filter_height + (dilation.y() - 1) * (filter_height - 1));
+
+    const int patch_half_width_floor  = patch_width / 2;
+    const int patch_half_height_floor = patch_height / 2;
+
+    const auto patch_half_width_ceil  = static_cast<int>(std::ceil(patch_width / 2));
+    const auto patch_half_height_ceil = static_cast<int>(std::ceil(patch_height / 2));
+
+    const int minimum_x = -pad_left + patch_half_width_floor;
+    const int minimum_y = -pad_top + patch_half_height_floor;
+    const int maximum_x = input_width + pad_left + pad_right - static_cast<int>(patch_width);
+    const int maximum_y = input_height + pad_top + pad_bottom - static_cast<int>(patch_height);
 
     int out_pos = 0;
     for(int r = 0; r < num_batches; ++r)
@@ -168,17 +186,17 @@
                 const int     out_z    = z * depth_multiplier + m;
                 const int32_t bias_val = *static_cast<const int32_t *>(biases(Coordinates(out_z)));
 
-                for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second)
+                for(int y = minimum_y; y <= minimum_y + maximum_y; y += conv_info.stride().second)
                 {
-                    for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first)
+                    for(int x = minimum_x; x <= minimum_x + maximum_x; x += conv_info.stride().first)
                     {
                         Coordinates coords(x, y, z, r);
                         int         filter_offset = filter_plane * out_z;
 
                         int32_t val = 0;
-                        for(int j = y - filter_half_height; j <= (y + filter_half_height); ++j)
+                        for(int j = y - patch_half_height_floor; j < y + patch_half_height_ceil; j += dilation.y())
                         {
-                            for(int i = x - filter_half_width; i <= (x + filter_half_width); ++i)
+                            for(int i = x - patch_half_width_floor; i < x + patch_half_width_ceil; i += dilation.x())
                             {
                                 coords.set(0, i);
                                 coords.set(1, j);
@@ -206,10 +224,10 @@
 }
 
 template SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape,
-                                                   const PadStrideInfo &conv_info, unsigned int depth_multiplier);
+                                                   const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, QuantizationInfo out_quant_info);
 
 template SimpleTensor<half> depthwise_convolution(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &biases, const TensorShape &dst_shape,
-                                                  const PadStrideInfo &conv_info, unsigned int depth_multiplier);
+                                                  const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, QuantizationInfo out_quant_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.h b/tests/validation/reference/DepthwiseConvolutionLayer.h
index bab3387..ac70de0 100644
--- a/tests/validation/reference/DepthwiseConvolutionLayer.h
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -37,7 +37,7 @@
 {
 template <typename T, typename TB>
 SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info,
-                                      unsigned int depth_multiplier);
+                                      unsigned int depth_multiplier, const Size2D &dilation = Size2D(1U, 1U), QuantizationInfo out_quant_info = QuantizationInfo(0.0f, 0));
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/DequantizationLayer.cpp b/tests/validation/reference/DequantizationLayer.cpp
index 33096a1..df50c14 100644
--- a/tests/validation/reference/DequantizationLayer.cpp
+++ b/tests/validation/reference/DequantizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,36 +31,24 @@
 {
 namespace reference
 {
-template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type>
-SimpleTensor<float> dequantization_layer(const SimpleTensor<T> &src, const SimpleTensor<float> &min_max)
+template <typename T>
+SimpleTensor<T> dequantization_layer(const SimpleTensor<uint8_t> &src)
 {
-    // Create reference
-    SimpleTensor<float> dst{ src.shape(), DataType::F32 };
+    const DataType          dst_data_type     = std::is_same<T, float>::value ? DataType::F32 : DataType::F16;
+    const QuantizationInfo &quantization_info = src.quantization_info();
 
-    // Compute reference
-    const int width       = src.shape().x();
-    const int height      = src.shape().y();
-    const int depth       = src.shape().z();
-    const int stride_w    = width * height * depth;
-    const int num_batches = min_max.shape().total_size_upper(1);
+    SimpleTensor<T> dst{ src.shape(), dst_data_type };
 
-    for(int k = 0; k < num_batches; ++k)
+    for(int i = 0; i < src.num_elements(); ++i)
     {
-        const float min     = min_max[k * 2 + 0];
-        const float max     = min_max[k * 2 + 1];
-        const float range   = max - min;
-        const float scaling = range / 255.0f;
-
-        for(int i = 0; i < stride_w; ++i)
-        {
-            dst[i + k * stride_w] = (static_cast<float>(src[i + k * stride_w]) * scaling) + min;
-        }
+        dst[i] = static_cast<T>(quantization_info.dequantize(src[i]));
     }
 
     return dst;
 }
 
-template SimpleTensor<float> dequantization_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<float> &min_max);
+template SimpleTensor<half> dequantization_layer(const SimpleTensor<uint8_t> &src);
+template SimpleTensor<float> dequantization_layer(const SimpleTensor<uint8_t> &src);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/DequantizationLayer.h b/tests/validation/reference/DequantizationLayer.h
index 1a8adcf..1d0e54b 100644
--- a/tests/validation/reference/DequantizationLayer.h
+++ b/tests/validation/reference/DequantizationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,8 +35,8 @@
 {
 namespace reference
 {
-template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
-SimpleTensor<float> dequantization_layer(const SimpleTensor<T> &src, const SimpleTensor<float> &min_max);
+template <typename T>
+SimpleTensor<T> dequantization_layer(const SimpleTensor<uint8_t> &src);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/ElementwiseOperations.cpp b/tests/validation/reference/ElementwiseOperations.cpp
index 6d533ed..2ffb0fa 100644
--- a/tests/validation/reference/ElementwiseOperations.cpp
+++ b/tests/validation/reference/ElementwiseOperations.cpp
@@ -124,7 +124,9 @@
 template <typename T>
 SimpleTensor<T> arithmetic_operation(ArithmeticOperation op, const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, SimpleTensor<T> &dst, ConvertPolicy convert_policy)
 {
-    Coordinates id_src1, id_src2, id_dst;
+    Coordinates id_src1{};
+    Coordinates id_src2{};
+    Coordinates id_dst{};
 
     BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, convert_policy, id_src1, id_src2, id_dst);
 
@@ -140,7 +142,9 @@
         SimpleTensor<float> src2_tmp = convert_from_asymmetric(src2);
         SimpleTensor<float> dst_tmp(TensorShape::broadcast_shape(src1.shape(), src2.shape()), dst.data_type());
 
-        Coordinates id_src1, id_src2, id_dst;
+        Coordinates id_src1{};
+        Coordinates id_src2{};
+        Coordinates id_dst{};
 
         BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1_tmp, src2_tmp, dst_tmp, convert_policy, id_src1, id_src2, id_dst);
 
@@ -150,7 +154,9 @@
     else
     {
         // DataType::U8
-        Coordinates id_src1, id_src2, id_dst;
+        Coordinates id_src1{};
+        Coordinates id_src2{};
+        Coordinates id_dst{};
 
         BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(op, src1, src2, dst, convert_policy, id_src1, id_src2, id_dst);
 
diff --git a/tests/validation/reference/FullyConnectedLayer.cpp b/tests/validation/reference/FullyConnectedLayer.cpp
index d65d0ca..07ddf6d 100644
--- a/tests/validation/reference/FullyConnectedLayer.cpp
+++ b/tests/validation/reference/FullyConnectedLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -105,10 +105,16 @@
 } // namespace
 
 template <typename T, typename TB>
-SimpleTensor<T> fully_connected_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &dst_shape)
+SimpleTensor<T> fully_connected_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &dst_shape, QuantizationInfo out_quant_info)
 {
+    // if no explicit quantization has been set you the same as src
+    if(out_quant_info == QuantizationInfo())
+    {
+        out_quant_info = src.quantization_info();
+    }
+
     // Create reference
-    SimpleTensor<T> dst{ TensorShape{ dst_shape }, src.data_type(), 1, src.quantization_info() };
+    SimpleTensor<T> dst{ TensorShape{ dst_shape }, src.data_type(), 1, out_quant_info };
 
     // Sanity checks
     const int          num_batch_dimensions = std::max(0, static_cast<int>(dst_shape.num_dimensions()) - 1);
@@ -145,9 +151,12 @@
     return dst;
 }
 
-template SimpleTensor<float> fully_connected_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, const TensorShape &dst_shape);
-template SimpleTensor<half> fully_connected_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, const TensorShape &dst_shape);
-template SimpleTensor<uint8_t> fully_connected_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &dst_shape);
+template SimpleTensor<float> fully_connected_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, const TensorShape &dst_shape,
+                                                   QuantizationInfo out_quant_info);
+template SimpleTensor<half> fully_connected_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, const TensorShape &dst_shape,
+                                                  QuantizationInfo out_quant_info);
+template SimpleTensor<uint8_t> fully_connected_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &dst_shape,
+                                                     QuantizationInfo out_quant_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/FullyConnectedLayer.h b/tests/validation/reference/FullyConnectedLayer.h
index 1dfb496..f474a1c 100644
--- a/tests/validation/reference/FullyConnectedLayer.h
+++ b/tests/validation/reference/FullyConnectedLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,7 +36,8 @@
 namespace reference
 {
 template <typename T, typename TB>
-SimpleTensor<T> fully_connected_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &dst_shape);
+SimpleTensor<T> fully_connected_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &dst_shape,
+                                      QuantizationInfo out_quant_info = QuantizationInfo());
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/HOGDetector.cpp b/tests/validation/reference/HOGDetector.cpp
index 5a5ae37..8ca1b0c 100644
--- a/tests/validation/reference/HOGDetector.cpp
+++ b/tests/validation/reference/HOGDetector.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,8 +39,8 @@
     const size_t num_block_strides_width  = hog_info.detection_window_size().width / hog_info.block_stride().width;
     const size_t num_block_strides_height = hog_info.detection_window_size().height / hog_info.block_stride().height;
 
-    return Size2D(floor_to_multiple(shape.x() - num_block_strides_width, window_step.width) + window_step.width,
-                  floor_to_multiple(shape.y() - num_block_strides_height, window_step.height) + window_step.height);
+    return Size2D{ floor_to_multiple(shape.x() - num_block_strides_width, window_step.width) + window_step.width,
+                   floor_to_multiple(shape.y() - num_block_strides_height, window_step.height) + window_step.height };
 }
 } // namespace
 
diff --git a/tests/validation/reference/PadLayer.cpp b/tests/validation/reference/PadLayer.cpp
index b9a93dd..d072bc5 100644
--- a/tests/validation/reference/PadLayer.cpp
+++ b/tests/validation/reference/PadLayer.cpp
@@ -36,27 +36,27 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> pad_layer(const SimpleTensor<T> &src, const PaddingList &paddings)
+SimpleTensor<T> pad_layer(const SimpleTensor<T> &src, const PaddingList &paddings, const PixelValue const_value, const PaddingMode mode)
 {
-    DataType dst_data_type = src.data_type();
+    const DataType dst_data_type = src.data_type();
 
-    TensorShape orig_shape = src.shape();
+    const TensorShape orig_shape = src.shape();
 
     std::vector<PaddingInfo> paddings_extended = paddings;
 
-    for(size_t i = paddings.size(); i < TensorShape::num_max_dimensions; i++)
+    for(size_t i = paddings.size(); i < TensorShape::num_max_dimensions; ++i)
     {
         paddings_extended.emplace_back(PaddingInfo{ 0, 0 });
     }
 
-    TensorShape padded_shape = misc::shape_calculator::compute_padded_shape(orig_shape, paddings);
+    const TensorShape padded_shape = misc::shape_calculator::compute_padded_shape(orig_shape, paddings);
 
     SimpleTensor<T> dst(padded_shape, dst_data_type);
 
     // Reference algorithm: loop over the different dimension of the input.
-    for(int idx = 0; idx < dst.num_elements(); idx++)
+    for(int idx = 0; idx < dst.num_elements(); ++idx)
     {
-        Coordinates coord = index2coord(padded_shape, idx);
+        const Coordinates coord = index2coord(padded_shape, idx);
 
         const size_t i = coord.x();
         const size_t j = coord.y();
@@ -65,23 +65,89 @@
         const size_t m = coord[4];
         const size_t n = coord[5];
 
-        std::array<size_t, TensorShape::num_max_dimensions> dims   = { { 0, 1, 2, 3, 4, 5 } };
-        std::array<size_t, TensorShape::num_max_dimensions> coords = { { i, j, k, l, m, n } };
+        const std::array<size_t, TensorShape::num_max_dimensions> dims   = { { 0, 1, 2, 3, 4, 5 } };
+        const std::array<size_t, TensorShape::num_max_dimensions> coords = { { i, j, k, l, m, n } };
         auto is_padding_area = [&](size_t i)
         {
             return (coords[i] < paddings_extended[i].first || coords[i] > orig_shape[i] + paddings_extended[i].first - 1);
         };
 
-        // If the tuple [i,j,k,l,m] is in the padding area, then seimply set the value
+        auto orig_coord_reflect = [&](size_t i)
+        {
+            if(is_padding_area(i))
+            {
+                if(coords[i] < paddings_extended[i].first)
+                {
+                    return paddings_extended[i].first - coords[i];
+                }
+                else
+                {
+                    return 2 * orig_shape[i] + paddings_extended[i].first - 2 - coords[i];
+                }
+            }
+            return coords[i] - paddings_extended[i].first;
+        };
+
+        auto orig_coord_symm = [&](size_t i)
+        {
+            if(is_padding_area(i))
+            {
+                if(coords[i] < paddings_extended[i].first)
+                {
+                    return paddings_extended[i].first - coords[i] - 1;
+                }
+                else
+                {
+                    return 2 * orig_shape[i] + paddings_extended[i].first - 1 - coords[i];
+                }
+            }
+            return coords[i] - paddings_extended[i].first;
+        };
+
+        // If the tuple [i,j,k,l,m] is in the padding area, then simply set the value
         if(std::any_of(dims.begin(), dims.end(), is_padding_area))
         {
-            dst[idx] = T(0);
+            switch(mode)
+            {
+                case PaddingMode::CONSTANT:
+                    const_value.get(dst[idx]);
+                    break;
+                case PaddingMode::REFLECT:
+                {
+                    const Coordinates orig_coords{ orig_coord_reflect(0),
+                                             orig_coord_reflect(1),
+                                             orig_coord_reflect(2),
+                                             orig_coord_reflect(3),
+                                             orig_coord_reflect(4),
+                                             orig_coord_reflect(5) };
+
+                    const size_t idx_src = coord2index(orig_shape, orig_coords);
+                    dst[idx]             = src[idx_src];
+                    break;
+                }
+                case PaddingMode::SYMMETRIC:
+                {
+                    const Coordinates orig_coords{ orig_coord_symm(0),
+                                             orig_coord_symm(1),
+                                             orig_coord_symm(2),
+                                             orig_coord_symm(3),
+                                             orig_coord_symm(4),
+                                             orig_coord_symm(5) };
+
+                    const size_t idx_src = coord2index(orig_shape, orig_coords);
+                    dst[idx]             = src[idx_src];
+                    break;
+                }
+                default:
+                    ARM_COMPUTE_ERROR("Padding mode not supported.");
+                    break;
+            }
         }
         else
         {
             // If the tuple[i,j,k,l,m] is not in the padding area, then copy the input into the output
 
-            Coordinates orig_coords{ i - paddings_extended[0].first,
+            const Coordinates orig_coords{ i - paddings_extended[0].first,
                                      j - paddings_extended[1].first,
                                      k - paddings_extended[2].first,
                                      l - paddings_extended[3].first,
@@ -96,13 +162,13 @@
     return dst;
 }
 
-template SimpleTensor<float> pad_layer(const SimpleTensor<float> &src, const PaddingList &paddings);
-template SimpleTensor<half> pad_layer(const SimpleTensor<half> &src, const PaddingList &paddings);
-template SimpleTensor<uint32_t> pad_layer(const SimpleTensor<uint32_t> &src, const PaddingList &paddings);
-template SimpleTensor<uint8_t> pad_layer(const SimpleTensor<uint8_t> &src, const PaddingList &paddings);
-template SimpleTensor<int8_t> pad_layer(const SimpleTensor<int8_t> &src, const PaddingList &paddings);
-template SimpleTensor<uint16_t> pad_layer(const SimpleTensor<uint16_t> &src, const PaddingList &paddings);
-template SimpleTensor<int16_t> pad_layer(const SimpleTensor<int16_t> &src, const PaddingList &paddings);
+template SimpleTensor<float> pad_layer(const SimpleTensor<float> &src, const PaddingList &paddings, const PixelValue const_value = PixelValue(), const PaddingMode mode);
+template SimpleTensor<half> pad_layer(const SimpleTensor<half> &src, const PaddingList &paddings, const PixelValue const_value = PixelValue(), const PaddingMode mode);
+template SimpleTensor<uint32_t> pad_layer(const SimpleTensor<uint32_t> &src, const PaddingList &paddings, const PixelValue const_value = PixelValue(), const PaddingMode mode);
+template SimpleTensor<uint8_t> pad_layer(const SimpleTensor<uint8_t> &src, const PaddingList &paddings, const PixelValue const_value = PixelValue(), const PaddingMode mode);
+template SimpleTensor<int8_t> pad_layer(const SimpleTensor<int8_t> &src, const PaddingList &paddings, const PixelValue const_value = PixelValue(), const PaddingMode mode);
+template SimpleTensor<uint16_t> pad_layer(const SimpleTensor<uint16_t> &src, const PaddingList &paddings, const PixelValue const_value = PixelValue(), const PaddingMode mode);
+template SimpleTensor<int16_t> pad_layer(const SimpleTensor<int16_t> &src, const PaddingList &paddings, const PixelValue const_value = PixelValue(), const PaddingMode mode);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/PadLayer.h b/tests/validation/reference/PadLayer.h
index 9406b05..5ebb5ed 100644
--- a/tests/validation/reference/PadLayer.h
+++ b/tests/validation/reference/PadLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,13 +39,15 @@
 /** Reference function to pad an ND tensor. This function is not supposed to be optimized, but to
  * clearly and naively execute the padding of a tensor
  *
- * @param[in] src      Tensor to pad
- * @param[in] paddings Padding size in each dimension
+ * @param[in] src         Tensor to pad
+ * @param[in] paddings    Padding size in each dimension
+ * @param[in] const_value Constant value to fill padding with
+ * @param[in] mode        [optional] Padding mode to use
  *
  * @return The padded Tensor
  */
 template <typename T>
-SimpleTensor<T> pad_layer(const SimpleTensor<T> &src, const PaddingList &paddings);
+SimpleTensor<T> pad_layer(const SimpleTensor<T> &src, const PaddingList &paddings, const PixelValue const_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/Permute.cpp b/tests/validation/reference/Permute.cpp
index 29c3c5c..619a787 100644
--- a/tests/validation/reference/Permute.cpp
+++ b/tests/validation/reference/Permute.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,11 +47,11 @@
     // Compute reference
     for(int i = 0; i < src.num_elements(); ++i)
     {
-        Coordinates coord = index2coord(src.shape(), i);
-        permute(coord, perm);
-        const size_t dst_index = coord2index(dst.shape(), coord);
+        const Coordinates src_coords = index2coord(src.shape(), i);
+        Coordinates       dst_coords = src_coords;
+        permute(dst_coords, perm);
 
-        dst[dst_index] = src[i];
+        std::copy_n(static_cast<const T *>(src(src_coords)), src.num_channels(), static_cast<T *>(dst(dst_coords)));
     }
 
     return dst;
diff --git a/tests/validation/reference/PixelWiseMultiplication.cpp b/tests/validation/reference/PixelWiseMultiplication.cpp
index d86f8aa..ea058ec 100644
--- a/tests/validation/reference/PixelWiseMultiplication.cpp
+++ b/tests/validation/reference/PixelWiseMultiplication.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -18,7 +18,7 @@
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * dst OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 #include "PixelWiseMultiplication.h"
@@ -139,7 +139,9 @@
         ARM_COMPUTE_ERROR("Scale of pixel-wise multiplication must be non-negative");
     }
 
-    Coordinates id_src1, id_src2, id_dst;
+    Coordinates id_src1{};
+    Coordinates id_src2{};
+    Coordinates id_dst{};
 
     BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(src1, src2, dst, scale, convert_policy, rounding_policy, id_src1, id_src2, id_dst);
 
@@ -166,7 +168,9 @@
             ARM_COMPUTE_ERROR("Scale of pixel-wise multiplication must be non-negative");
         }
 
-        Coordinates id_src1, id_src2, id_dst;
+        Coordinates id_src1{};
+        Coordinates id_src2{};
+        Coordinates id_dst{};
         BroadcastUnroll<Coordinates::num_max_dimensions>::unroll(src1, src2, dst, scale, convert_policy, rounding_policy, id_src1, id_src2, id_dst);
     }
     return dst;
diff --git a/tests/validation/reference/PoolingLayer.cpp b/tests/validation/reference/PoolingLayer.cpp
index e617c93..f4112a4 100644
--- a/tests/validation/reference/PoolingLayer.cpp
+++ b/tests/validation/reference/PoolingLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -38,8 +38,9 @@
 using namespace arm_compute::misc::shape_calculator;
 
 template <typename T>
-SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, const PoolingLayerInfo &info)
+SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo)
 {
+    ARM_COMPUTE_UNUSED(output_qinfo); // requantization occurs in pooling_layer<uint8_t>
     ARM_COMPUTE_ERROR_ON(info.is_global_pooling() && (src.shape().x() != src.shape().y()));
 
     // Create reference
@@ -152,16 +153,16 @@
 }
 
 template <>
-SimpleTensor<uint8_t> pooling_layer<uint8_t>(const SimpleTensor<uint8_t> &src, const PoolingLayerInfo &info)
+SimpleTensor<uint8_t> pooling_layer<uint8_t>(const SimpleTensor<uint8_t> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo)
 {
     SimpleTensor<float>   src_tmp = convert_from_asymmetric(src);
-    SimpleTensor<float>   dst_tmp = pooling_layer<float>(src_tmp, info);
-    SimpleTensor<uint8_t> dst     = convert_to_asymmetric(dst_tmp, src.quantization_info());
+    SimpleTensor<float>   dst_tmp = pooling_layer<float>(src_tmp, info, output_qinfo);
+    SimpleTensor<uint8_t> dst     = convert_to_asymmetric(dst_tmp, output_qinfo);
     return dst;
 }
 
-template SimpleTensor<float> pooling_layer(const SimpleTensor<float> &src, const PoolingLayerInfo &info);
-template SimpleTensor<half> pooling_layer(const SimpleTensor<half> &src, const PoolingLayerInfo &info);
+template SimpleTensor<float> pooling_layer(const SimpleTensor<float> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo);
+template SimpleTensor<half> pooling_layer(const SimpleTensor<half> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/PoolingLayer.h b/tests/validation/reference/PoolingLayer.h
index 0097789..1c0b7ff 100644
--- a/tests/validation/reference/PoolingLayer.h
+++ b/tests/validation/reference/PoolingLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,7 +36,7 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, const PoolingLayerInfo &info);
+SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/QuantizationLayer.cpp b/tests/validation/reference/QuantizationLayer.cpp
index d7ce490..2f33481 100644
--- a/tests/validation/reference/QuantizationLayer.cpp
+++ b/tests/validation/reference/QuantizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -33,54 +33,25 @@
 {
 namespace reference
 {
-template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type>
-SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<T> &src)
+template <typename T>
+SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<T> &src, const QuantizationInfo quantization_info)
 {
     // Create reference
-    SimpleTensor<uint8_t> dst{ src.shape(), DataType::U8 };
+    SimpleTensor<uint8_t> dst{ src.shape(), DataType::QASYMM8, 1, quantization_info };
 
-    const int width       = src.shape().x();
-    const int height      = src.shape().y();
-    const int depth       = src.shape().z();
-    const int stride_w    = width * height * depth;
-    const int num_batches = src.shape().total_size_upper(3);
-
-    for(int k = 0; k < num_batches; ++k)
+    for(int i = 0; i < src.num_elements(); ++i)
     {
-        // Compute min and max of the 3D tensor
-        float min = src[k * stride_w];
-        float max = src[k * stride_w];
-
-        // Look for min and max values
-        for(int i = 1; i < stride_w; ++i)
-        {
-            float val = src[i + k * stride_w];
-            min       = std::min(min, val);
-            max       = std::max(max, val);
-        }
-
-        // Saturate the result in case min = max
-        if(min == max)
-        {
-            min = 0.0f;
-            max = 1.0f;
-        }
-
-        const float range = max - min;
-
-        for(int i = 0; i < stride_w; ++i)
-        {
-            // map values to range [0.0, 1.0]
-            float       val        = src[i + k * stride_w];
-            const float normalized = (val - min) / range;
-            dst[i + k * stride_w]  = static_cast<uint8_t>(std::min(255.0f, normalized * 256.0f));
-        }
+#ifdef __aarch64__
+        dst[i] = quantization_info.quantize((src[i]), RoundingPolicy::TO_NEAREST_EVEN);
+#else  // __aarch64__
+        dst[i] = quantization_info.quantize((src[i]), RoundingPolicy::TO_ZERO);
+#endif // __aarch64__
     }
-
     return dst;
 }
 
-template SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<float> &src);
+template SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<half> &src, const QuantizationInfo quantization_info);
+template SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<float> &src, const QuantizationInfo quantization_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/QuantizationLayer.h b/tests/validation/reference/QuantizationLayer.h
index 7c5572c..2d13690 100644
--- a/tests/validation/reference/QuantizationLayer.h
+++ b/tests/validation/reference/QuantizationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,8 +35,8 @@
 {
 namespace reference
 {
-template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0>
-SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<T> &src);
+template <typename T>
+SimpleTensor<uint8_t> quantization_layer(const SimpleTensor<T> &src, const QuantizationInfo quantization_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/SliceOperations.cpp b/tests/validation/reference/SliceOperations.cpp
index 40ca9de..b34afdc 100644
--- a/tests/validation/reference/SliceOperations.cpp
+++ b/tests/validation/reference/SliceOperations.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -103,7 +103,9 @@
     SimpleTensor<T> dst{ dst_shape, src.data_type(), 1 };
 
     // Get coordinates
-    Coordinates starts_abs, ends_abs, final_strides;
+    Coordinates starts_abs{};
+    Coordinates ends_abs{};
+    Coordinates final_strides{};
     std::tie(starts_abs, ends_abs, final_strides) = calculate_strided_slice_coords(src_shape,
                                                                                    starts, ends, strides,
                                                                                    begin_mask, end_mask, shrink_axis_mask);
diff --git a/tests/validation/reference/Sobel.cpp b/tests/validation/reference/Sobel.cpp
index ff0e11d..233f1ad 100644
--- a/tests/validation/reference/Sobel.cpp
+++ b/tests/validation/reference/Sobel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/tests/validation/reference/StackLayer.cpp b/tests/validation/reference/StackLayer.cpp
index 50e440c..9e9e434 100644
--- a/tests/validation/reference/StackLayer.cpp
+++ b/tests/validation/reference/StackLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,11 +56,11 @@
     // i_coordinates[0] = xi, i_coordinates[1] = yi, i_coordinates[2] = zi
     // i_coordinates[3] = bi, i_coordinates[4] = i, i_coordinates[5] = 0
     // i_coordinates[5] will be always zero and used for not incrementing the output when the input has less than 4 dimensions
-    int i_coordinates[6] = { 0 };
+    std::array<int, 6> i_coordinates{ 0 };
 
     // Array of pointers used to map the output coordinates to the input ones accordingly with the axis
     // This array is initialized with &i_coordinates[5] since this will be always zero
-    int *o_coordinates[5] = { &i_coordinates[5], &i_coordinates[5], &i_coordinates[5], &i_coordinates[5], &i_coordinates[5] };
+    std::array<int *, 5> o_coordinates = { &i_coordinates[5], &i_coordinates[5], &i_coordinates[5], &i_coordinates[5], &i_coordinates[5] };
 
     // Set the axis coordinate
     o_coordinates[axis] = &i_coordinates[4];
diff --git a/tests/validation/reference/Winograd.cpp b/tests/validation/reference/Winograd.cpp
index 294993b..47f5ac7 100644
--- a/tests/validation/reference/Winograd.cpp
+++ b/tests/validation/reference/Winograd.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -45,7 +45,7 @@
 void initialize_matrix_transform(SimpleTensor<T> &src, const Size2D &output_tile_size, const Size2D &kernel_size, WinogradTransformType winograd_transform_type)
 {
     // Winograd input transform matrices
-    static const float imatrix2x2_3x3[] =
+    static const std::array<float, 16> imatrix2x2_3x3 =
     {
         1.0f, 0.0f, -1.0f, 0.0f,
         0.0f, 1.0f, 1.0f, 0.0f,
@@ -53,7 +53,7 @@
         0.0f, 1.0f, 0.0f, -1.0f
     };
 
-    static const float imatrix4x4_3x3[] =
+    static const std::array<float, 36> imatrix4x4_3x3 =
     {
         4.0f, 0.0f, -5.0f, 0.0f, 1.0f, 0.0f,
         0.0f, -4.0f, -4.0f, 1.0f, 1.0f, 0.0f,
@@ -63,7 +63,7 @@
         0.0f, 4.0f, 0.0f, -5.0f, 0.0f, 1.0f,
     };
 
-    static const float imatrix4x4_5x5[] =
+    static const std::array<float, 64> imatrix4x4_5x5 =
     {
         1.f, 0.f, -21.f / 4.f, 0.f, 21.f / 4.f, 0.f, -1.f, 0.f,
         0.f, 1.f, 1.f, -17.f / 4.f, -17.f / 4.f, 1.f, 1.f, 0.f,
@@ -75,7 +75,7 @@
         0.f, -1.f, 0.f, 21.f / 4.f, 0.f, -21.f / 4.f, 0.f, 1.f
     };
 
-    static const float imatrix2x1_7x7[] =
+    static const std::array<float, 64> imatrix2x1_7x7 =
     {
         -36.0f, 0.0f, 49.0f, 0.0f, -14.0f, 0.0f, 1.0f, 0.0f,
         0.0f, -36.0f, 36.0f, 13.0f, -13.0f, -1.0f, 1.0f, 0.0f,
@@ -90,7 +90,7 @@
     // ------------------------------------------
 
     // Winograd filter transform matrices
-    static const float fmatrix2x2_3x3[] =
+    static const std::array<float, 12> fmatrix2x2_3x3 =
     {
         1.0f, 0.0f, 0.0f,
         0.5f, 0.5f, 0.5f,
@@ -98,7 +98,7 @@
         0.0f, 0.0f, 1.0f
     };
 
-    static const float fmatrix4x4_3x3[] =
+    static const std::array<float, 18> fmatrix4x4_3x3 =
     {
         0.25f, 0.0f, 0.0f,
         -1.0f / 6.0f, -1.0f / 6.0f, -1.0f / 6.0f,
@@ -108,7 +108,7 @@
         0.0f, 0.0f, 1.0f
     };
 
-    static const float fmatrix4x4_5x5[] =
+    static const std::array<float, 40> fmatrix4x4_5x5 =
     {
         1.0f, 0.0f, 0.0f, 0.0f, 0.0f,
         -2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f, -2.0f / 9.0f,
@@ -121,7 +121,7 @@
 
     };
 
-    static const float fmatrix2x1_7x7[] =
+    static const std::array<float, 56> fmatrix2x1_7x7 =
     {
         -1.0f / 36.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
         1.0f / 48.0f, -1.0f / 48.0f, 1.0f / 48.0f, -1.0f / 48.0f, 1.0f / 48.0f, -1.0f / 48.0f, 1.0f / 48.0f,
@@ -136,13 +136,13 @@
     // ------------------------------------------
 
     // Winograd output transform matrices
-    static const float omatrix2x2_3x3[] =
+    static const std::array<float, 8> omatrix2x2_3x3 =
     {
         1.0f, 1.0f, 1.0f, 0.0f,
         0.0f, 1.0f, -1.0f, -1.0f
     };
 
-    static const float omatrix4x4_3x3[] =
+    static const std::array<float, 24> omatrix4x4_3x3 =
     {
         1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.0f,
         0.0f, 1.0f, -1.0f, 2.0f, -2.0f, 0.0f,
@@ -150,7 +150,7 @@
         0.0f, 1.0f, -1.0f, 8.0f, -8.0f, 1.0f
     };
 
-    static const float omatrix4x4_5x5[] =
+    static const std::array<float, 36> omatrix4x4_5x5 =
     {
         1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 8.0f, 8.0f, 0.0f,
         0.0f, 1.0f, -1.0f, 2.0f, -2.0f, 4.0f, -4.0f, 0.0f,
@@ -158,7 +158,7 @@
         0.0f, 1.0f, -1.0f, 8.0f, -8.0f, 1.0f, -1.0f, 1.0f
     };
 
-    static const float omatrix2x1_7x7[] =
+    static const std::array<float, 16> omatrix2x1_7x7 =
     {
         1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.0f,
         0.0f, -1.0f, 1.0f, -2.0f, 2.0f, -3.0f, 3.0f, 1.0f
@@ -171,39 +171,42 @@
     // Key = (Output tile size, Kernel size, Winograd transform type)
     static std::map<WinogradKey, const float *> matrix_map =
     {
-        { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix2x2_3x3 },
-        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix4x4_3x3 },
-        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::INPUT), imatrix2x2_3x3 },
-        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::INPUT), imatrix4x4_3x3 },
-        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::INPUT), imatrix2x2_3x3 },
-        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::INPUT), imatrix4x4_3x3 },
-        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::INPUT), imatrix4x4_5x5 },
-        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::INPUT), imatrix4x4_5x5 },
-        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1), WinogradTransformType::INPUT), imatrix2x1_7x7 },
-        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7), WinogradTransformType::INPUT), imatrix2x1_7x7 },
-        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::INPUT), imatrix4x4_5x5 },
-        { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3 },
-        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3 },
-        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::FILTER), fmatrix2x2_3x3 },
-        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::FILTER), fmatrix4x4_3x3 },
-        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3 },
-        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3 },
-        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::FILTER), fmatrix4x4_5x5 },
-        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::FILTER), fmatrix4x4_5x5 },
-        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1), WinogradTransformType::FILTER), fmatrix2x1_7x7 },
-        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7), WinogradTransformType::FILTER), fmatrix2x1_7x7 },
-        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::FILTER), fmatrix4x4_5x5 },
-        { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3 },
-        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3 },
-        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::OUTPUT), omatrix2x2_3x3 },
-        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::OUTPUT), omatrix4x4_3x3 },
-        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3 },
-        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3 },
-        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::OUTPUT), omatrix4x4_5x5 },
-        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::OUTPUT), omatrix4x4_5x5 },
-        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1), WinogradTransformType::OUTPUT), omatrix2x1_7x7 },
-        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7), WinogradTransformType::OUTPUT), omatrix2x1_7x7 },
-        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::OUTPUT), omatrix4x4_5x5 },
+        { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix2x2_3x3.data() },
+        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::INPUT), imatrix4x4_3x3.data() },
+        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::INPUT), imatrix2x2_3x3.data() },
+        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::INPUT), imatrix4x4_3x3.data() },
+        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::INPUT), imatrix2x2_3x3.data() },
+        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::INPUT), imatrix4x4_3x3.data() },
+        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::INPUT), imatrix4x4_5x5.data() },
+        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::INPUT), imatrix4x4_5x5.data() },
+        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1), WinogradTransformType::INPUT), imatrix2x1_7x7.data() },
+        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7), WinogradTransformType::INPUT), imatrix2x1_7x7.data() },
+        { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7), WinogradTransformType::INPUT), imatrix2x1_7x7.data() },
+        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::INPUT), imatrix4x4_5x5.data() },
+        { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3.data() },
+        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3.data() },
+        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::FILTER), fmatrix2x2_3x3.data() },
+        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::FILTER), fmatrix4x4_3x3.data() },
+        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::FILTER), fmatrix2x2_3x3.data() },
+        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::FILTER), fmatrix4x4_3x3.data() },
+        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::FILTER), fmatrix4x4_5x5.data() },
+        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::FILTER), fmatrix4x4_5x5.data() },
+        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1), WinogradTransformType::FILTER), fmatrix2x1_7x7.data() },
+        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7), WinogradTransformType::FILTER), fmatrix2x1_7x7.data() },
+        { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7), WinogradTransformType::FILTER), fmatrix2x1_7x7.data() },
+        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::FILTER), fmatrix4x4_5x5.data() },
+        { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3.data() },
+        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3.data() },
+        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(3, 1), WinogradTransformType::OUTPUT), omatrix2x2_3x3.data() },
+        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(3, 1), WinogradTransformType::OUTPUT), omatrix4x4_3x3.data() },
+        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 3), WinogradTransformType::OUTPUT), omatrix2x2_3x3.data() },
+        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 3), WinogradTransformType::OUTPUT), omatrix4x4_3x3.data() },
+        { WinogradKey(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5), WinogradTransformType::OUTPUT), omatrix4x4_5x5.data() },
+        { WinogradKey(std::pair<int, int>(4, 1), std::pair<int, int>(5, 1), WinogradTransformType::OUTPUT), omatrix4x4_5x5.data() },
+        { WinogradKey(std::pair<int, int>(2, 1), std::pair<int, int>(7, 1), WinogradTransformType::OUTPUT), omatrix2x1_7x7.data() },
+        { WinogradKey(std::pair<int, int>(1, 2), std::pair<int, int>(1, 7), WinogradTransformType::OUTPUT), omatrix2x1_7x7.data() },
+        { WinogradKey(std::pair<int, int>(2, 2), std::pair<int, int>(7, 7), WinogradTransformType::OUTPUT), omatrix2x1_7x7.data() },
+        { WinogradKey(std::pair<int, int>(1, 4), std::pair<int, int>(1, 5), WinogradTransformType::OUTPUT), omatrix4x4_5x5.data() },
     };
 
     // Find transformation matrix