arm_compute v17.12
diff --git a/tests/validation/CL/AbsoluteDifference.cpp b/tests/validation/CL/AbsoluteDifference.cpp
new file mode 100644
index 0000000..cabb76f
--- /dev/null
+++ b/tests/validation/CL/AbsoluteDifference.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLAbsoluteDifference.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/AbsoluteDifferenceFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Input data sets **/
+const auto AbsoluteDifferenceU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)), framework::dataset::make("DataType",
+                                                 DataType::U8));
+const auto AbsoluteDifferenceS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
+                                                  framework::dataset::make("DataType", DataType::S16));
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(AbsoluteDifference)
+
+template <typename T>
+using CLAbsoluteDifferenceFixture = AbsoluteDifferenceValidationFixture<CLTensor, CLAccessor, CLAbsoluteDifference, T>;
+
+TEST_SUITE(U8)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), AbsoluteDifferenceU8Dataset),
+               shape, data_type0, data_type1, output_data_type)
+{
+    // Create tensors
+    CLTensor ref_src1 = create_tensor<CLTensor>(shape, data_type0);
+    CLTensor ref_src2 = create_tensor<CLTensor>(shape, data_type1);
+    CLTensor dst      = create_tensor<CLTensor>(shape, output_data_type);
+
+    // Create and Configure function
+    CLAbsoluteDifference abs_diff;
+    abs_diff.configure(&ref_src1, &ref_src2, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(ref_src1.info()->padding(), padding);
+    validate(ref_src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLAbsoluteDifferenceFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), AbsoluteDifferenceU8Dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLAbsoluteDifferenceFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AbsoluteDifferenceU8Dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(S16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), AbsoluteDifferenceS16Dataset),
+               shape, data_type0, data_type1, output_data_type)
+{
+    // Create tensors
+    CLTensor ref_src1 = create_tensor<CLTensor>(shape, data_type0);
+    CLTensor ref_src2 = create_tensor<CLTensor>(shape, data_type1);
+    CLTensor dst      = create_tensor<CLTensor>(shape, output_data_type);
+
+    // Create and Configure function
+    CLAbsoluteDifference abs_diff;
+    abs_diff.configure(&ref_src1, &ref_src2, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(ref_src1.info()->padding(), padding);
+    validate(ref_src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLAbsoluteDifferenceFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), AbsoluteDifferenceS16Dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLAbsoluteDifferenceFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AbsoluteDifferenceS16Dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/Accumulate.cpp b/tests/validation/CL/Accumulate.cpp
new file mode 100644
index 0000000..2e33540
--- /dev/null
+++ b/tests/validation/CL/Accumulate.cpp
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLAccumulate.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/AccumulateFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Tolerance value for comparing reference's output against implementation's output for floating point data types */
+constexpr AbsoluteTolerance<float> tolerance(1.0f);
+/** Input data sets **/
+const auto AccumulateU8Dataset  = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8));
+const auto AccumulateS16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16));
+} // namespace
+TEST_SUITE(CL)
+TEST_SUITE(Accumulate)
+
+TEST_SUITE(U8)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), AccumulateS16Dataset),
+               shape, data_type, output_data_type)
+{
+    // Create tensors
+    CLTensor ref_src = create_tensor<CLTensor>(shape, data_type);
+    CLTensor dst     = create_tensor<CLTensor>(shape, output_data_type);
+
+    // Create and Configure function
+    CLAccumulate accum;
+    accum.configure(&ref_src, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(ref_src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+template <typename T1>
+using CLAccumulateFixture = AccumulateValidationFixture<CLTensor, CLAccessor, CLAccumulate, T1, int16_t>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLAccumulateFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), AccumulateS16Dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLAccumulateFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AccumulateS16Dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE(AccumulateWeighted)
+
+TEST_SUITE(U8)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), AccumulateU8Dataset),
+               shape, data_type, output_data_type)
+{
+    // Generate a random alpha value
+    std::mt19937                     gen(library->seed());
+    std::uniform_real_distribution<> float_dist(0, 1);
+    const float                      alpha = float_dist(gen);
+
+    // Create tensors
+    CLTensor ref_src = create_tensor<CLTensor>(shape, data_type);
+    CLTensor dst     = create_tensor<CLTensor>(shape, output_data_type);
+
+    // Create and Configure function
+    CLAccumulateWeighted accum_weight;
+    accum_weight.configure(&ref_src, alpha, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(ref_src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+template <typename T1>
+using CLAccumulateWeightedFixture = AccumulateWeightedValidationFixture<CLTensor, CLAccessor, CLAccumulateWeighted, T1, uint8_t>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLAccumulateWeightedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), AccumulateU8Dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLAccumulateWeightedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AccumulateU8Dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE(AccumulateSquared)
+
+TEST_SUITE(U8)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), AccumulateS16Dataset),
+               shape, data_type, output_data_type)
+{
+    // Generate a random shift value
+    std::mt19937                            gen(library->seed());
+    std::uniform_int_distribution<uint32_t> int_dist(0, 15);
+    const uint32_t                          shift = int_dist(gen);
+
+    // Create tensors
+    CLTensor ref_src = create_tensor<CLTensor>(shape, data_type);
+    CLTensor dst     = create_tensor<CLTensor>(shape, output_data_type);
+
+    // Create and Configure function
+    CLAccumulateSquared accum_square;
+    accum_square.configure(&ref_src, shift, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(ref_src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+template <typename T1>
+using CLAccumulateSquaredFixture = AccumulateSquaredValidationFixture<CLTensor, CLAccessor, CLAccumulateSquared, T1, int16_t>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLAccumulateSquaredFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), AccumulateS16Dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLAccumulateSquaredFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AccumulateS16Dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/ActivationLayer.cpp b/tests/validation/CL/ActivationLayer.cpp
index 83bd2d0..5ceaeca 100644
--- a/tests/validation/CL/ActivationLayer.cpp
+++ b/tests/validation/CL/ActivationLayer.cpp
@@ -156,6 +156,47 @@
     }
 }
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // Mismatching data types
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // Window shrink
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported activation
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 2),  // Mismatching fixed point
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                     }),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8),
+                                                       TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                       TensorInfo(),
+                                                     })),
+               framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU),
+                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                          })),
+               framework::dataset::make("Expected", { false, false, true, true, false, false, false, true, true })),
+               input_info, output_info, act_info, expected)
+{
+    ARM_COMPUTE_EXPECT(bool(CLActivationLayer::validate(&input_info.clone()->set_is_resizable(false), (output_info.total_size() == 0) ? nullptr : &output_info.clone()->set_is_resizable(false), act_info)) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 template <typename T>
 using CLActivationLayerFixture = ActivationValidationFixture<CLTensor, CLAccessor, CLActivationLayer, T>;
 
@@ -196,7 +237,7 @@
 template <typename T>
 using CLActivationLayerFixedPointFixture = ActivationValidationFixedPointFixture<CLTensor, CLAccessor, CLActivationLayer, T>;
 
-TEST_SUITE(Quantized)
+TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
 // We test for fixed point precision [3,5] because [1,2] and [6,7] ranges cause
 // overflowing issues in most of the transcendentals functions.
@@ -239,6 +280,34 @@
 TEST_SUITE_END()
 TEST_SUITE_END()
 
+template <typename T>
+using CLActivationLayerQuantizedFixture = ActivationValidationQuantizedFixture<CLTensor, CLAccessor, CLActivationLayer, T>;
+
+/** Input data sets. */
+const auto QuantizedActivationDataset = combine(combine(framework::dataset::make("InPlace", { false, true }), framework::dataset::make("ActivationFunction", { ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU })),
+                                                framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLActivationLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), QuantizedActivationDataset),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::QASYMM8)),
+                                                                                                                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance(_function, _data_type));
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLActivationLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), QuantizedActivationDataset),
+                                                                                                                      framework::dataset::make("DataType",
+                                                                                                                              DataType::QASYMM8)),
+                                                                                                                      framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance(_function, _data_type));
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
 TEST_SUITE_END()
 TEST_SUITE_END()
 } // namespace validation
diff --git a/tests/validation/CL/ArithmeticAddition.cpp b/tests/validation/CL/ArithmeticAddition.cpp
index 51257b4..787b1b9 100644
--- a/tests/validation/CL/ArithmeticAddition.cpp
+++ b/tests/validation/CL/ArithmeticAddition.cpp
@@ -61,6 +61,41 @@
 TEST_SUITE(CL)
 TEST_SUITE(ArithmeticAddition)
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+               framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),      // Window shrink
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid data type combination
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),  // Mismatching fixed point
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                      }),
+               framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
+                                                       TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                     })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                     })),
+               framework::dataset::make("Expected", { true, true, false, false, false, false, true })),
+               input1_info, input2_info, output_info, expected)
+{
+    ARM_COMPUTE_EXPECT(bool(CLArithmeticAddition::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), ConvertPolicy::WRAP)) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 template <typename T>
 using CLArithmeticAdditionFixture = ArithmeticAdditionValidationFixture<CLTensor, CLAccessor, CLArithmeticAddition, T>;
 
diff --git a/tests/validation/CL/ArithmeticSubtraction.cpp b/tests/validation/CL/ArithmeticSubtraction.cpp
index 817a31f..34fdb0b 100644
--- a/tests/validation/CL/ArithmeticSubtraction.cpp
+++ b/tests/validation/CL/ArithmeticSubtraction.cpp
@@ -47,8 +47,14 @@
 const auto ArithmeticSubtractionU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)),
                                                     framework::dataset::make("DataType",
                                                                              DataType::U8));
-const auto ArithmeticSubtractionS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
+const auto ArithmeticSubtractionS16Dataset = combine(combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S16)),
                                                      framework::dataset::make("DataType", DataType::S16));
+const auto ArithmeticSubtractionU8U8S16Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)),
+                                                         framework::dataset::make("DataType", DataType::S16));
+const auto ArithmeticSubtractionS16U8S16Dataset = combine(combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8)),
+                                                          framework::dataset::make("DataType", DataType::S16));
+const auto ArithmeticSubtractionU8S16S16Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16)),
+                                                          framework::dataset::make("DataType", DataType::S16));
 const auto ArithmeticSubtractionQS8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QS8), framework::dataset::make("DataType", DataType::QS8)),
                                                      framework::dataset::make("DataType", DataType::QS8));
 const auto ArithmeticSubtractionQS16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QS16), framework::dataset::make("DataType", DataType::QS16)),
@@ -62,8 +68,43 @@
 TEST_SUITE(CL)
 TEST_SUITE(ArithmeticSubtraction)
 
-template <typename T>
-using CLArithmeticSubtractionFixture = ArithmeticSubtractionValidationFixture<CLTensor, CLAccessor, CLArithmeticSubtraction, T>;
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+               framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),      // Window shrink
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid data type combination
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),  // Mismatching fixed point
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                      }),
+               framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
+                                                       TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                     })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                     })),
+               framework::dataset::make("Expected", { true, true, false, false, false, false, true })),
+               input1_info, input2_info, output_info, expected)
+{
+    ARM_COMPUTE_EXPECT(bool(CLArithmeticSubtraction::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), ConvertPolicy::WRAP)) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T1, typename T2 = T1, typename T3 = T1>
+using CLArithmeticSubtractionFixture = ArithmeticSubtractionValidationFixture<CLTensor, CLAccessor, CLArithmeticSubtraction, T1, T2, T3>;
 
 TEST_SUITE(U8)
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
@@ -89,7 +130,8 @@
     validate(dst.info()->padding(), padding);
 }
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticSubtractionU8Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
+                                                                                                                     ArithmeticSubtractionU8Dataset),
                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
 {
     // Validate output
@@ -97,14 +139,19 @@
 }
 TEST_SUITE_END()
 
+template <typename T1, typename T2 = T1>
+using CLArithmeticSubtractionToS16Fixture = CLArithmeticSubtractionFixture<T1, T2, int16_t>;
+
 TEST_SUITE(S16)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()),
+                                                                                   framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
+                                                                           framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-               shape, data_type, policy)
+               shape, data_type1, data_type2, policy)
 {
     // Create tensors
-    CLTensor ref_src1 = create_tensor<CLTensor>(shape, data_type);
-    CLTensor ref_src2 = create_tensor<CLTensor>(shape, DataType::S16);
+    CLTensor ref_src1 = create_tensor<CLTensor>(shape, data_type1);
+    CLTensor ref_src2 = create_tensor<CLTensor>(shape, data_type2);
     CLTensor dst      = create_tensor<CLTensor>(shape, DataType::S16);
 
     // Create and Configure function
@@ -121,28 +168,86 @@
     validate(ref_src2.info()->padding(), padding);
     validate(dst.info()->padding(), padding);
 }
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticSubtractionS16Dataset),
-                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+TEST_SUITE(S16_S16_S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionToS16Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticSubtractionS16Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticSubtractionS16Dataset),
-                                                                                                                   framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionToS16Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticSubtractionS16Dataset),
+                                                                                                                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 TEST_SUITE_END()
 
-template <typename T>
-using CLArithmeticSubtractionFixedPointFixture = ArithmeticSubtractionValidationFixedPointFixture<CLTensor, CLAccessor, CLArithmeticSubtraction, T>;
+TEST_SUITE(U8_U8_S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionToS16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
+                       ArithmeticSubtractionU8U8S16Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionToS16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
+                                                                                                                        ArithmeticSubtractionU8U8S16Dataset),
+                                                                                                                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(S16_U8_S16)
+using CLAriSubS16U8ToS16Fixture = CLArithmeticSubtractionToS16Fixture<int16_t, uint8_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, CLAriSubS16U8ToS16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
+                                                                                                               ArithmeticSubtractionS16U8S16Dataset),
+                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLAriSubS16U8ToS16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
+                                                                                                             ArithmeticSubtractionS16U8S16Dataset),
+                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U8_S16_S16)
+using CLAriSubU8S16ToS16Fixture = CLArithmeticSubtractionToS16Fixture<uint8_t, int16_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, CLAriSubU8S16ToS16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
+                                                                                                               ArithmeticSubtractionU8S16S16Dataset),
+                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLAriSubU8S16ToS16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
+                                                                                                             ArithmeticSubtractionU8S16S16Dataset),
+                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+template <typename T1, typename T2 = T1, typename T3 = T1>
+using CLArithmeticSubtractionFixedPointFixture = ArithmeticSubtractionValidationFixedPointFixture<CLTensor, CLAccessor, CLArithmeticSubtraction, T1, T2, T3>;
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionQS8Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+                       ArithmeticSubtractionQS8Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 7)))
 {
@@ -150,7 +255,8 @@
     validate(CLAccessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticSubtractionQS8Dataset),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       ArithmeticSubtractionQS8Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 7)))
 {
@@ -169,7 +275,8 @@
     validate(CLAccessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticSubtractionQS16Dataset),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       ArithmeticSubtractionQS16Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 15)))
 {
diff --git a/tests/validation/CL/BatchNormalizationLayer.cpp b/tests/validation/CL/BatchNormalizationLayer.cpp
index 69f8d7b..30dd70a 100644
--- a/tests/validation/CL/BatchNormalizationLayer.cpp
+++ b/tests/validation/CL/BatchNormalizationLayer.cpp
@@ -59,7 +59,7 @@
                shape0, shape1, epsilon, dt)
 {
     // Set fixed point position data type allowed
-    int fixed_point_position = (arm_compute::is_data_type_fixed_point(dt)) ? 3 : 0;
+    const int fixed_point_position = (arm_compute::is_data_type_fixed_point(dt)) ? 3 : 0;
 
     // Create tensors
     CLTensor src   = create_tensor<CLTensor>(shape0, dt, 1, fixed_point_position);
@@ -78,6 +78,49 @@
     validate(dst.info()->valid_region(), valid_region);
 }
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Window shrink
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),    // Mismatching data types
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),    // Mismatching data types
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),    // Invalid mean/var/beta/gamma shape
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2), // Mismatching fixed point position
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                     }),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                       TensorInfo(),
+                                                     })),
+               framework::dataset::make("MVBGInfo",{ TensorInfo(TensorShape(2U), 1, DataType::F32),
+                                                     TensorInfo(TensorShape(2U), 1, DataType::F32),
+                                                     TensorInfo(TensorShape(2U), 1, DataType::F16),
+                                                     TensorInfo(TensorShape(2U), 1, DataType::F32),
+                                                     TensorInfo(TensorShape(5U), 1, DataType::F32),
+                                                     TensorInfo(TensorShape(2U), 1, DataType::QS8, 2),
+                                                     TensorInfo(TensorShape(2U), 1, DataType::QS8, 2),
+                                                     TensorInfo(TensorShape(2U), 1, DataType::QS8, 2),
+                                                   })),
+               framework::dataset::make("Expected", { true, false, false, false, false, false, true, true})),
+               input_info, output_info, mvbg_info, expected)
+{
+    const auto &mean_info = mvbg_info;
+    const auto &var_info = mvbg_info;
+    const auto &beta_info = mvbg_info;
+    const auto &gamma_info = mvbg_info;
+    bool has_error = bool(CLBatchNormalizationLayer::validate(&input_info.clone()->set_is_resizable(false), (output_info.total_size() == 0) ? nullptr : &output_info.clone()->set_is_resizable(false), &mean_info.clone()->set_is_resizable(false), &var_info.clone()->set_is_resizable(false), &beta_info.clone()->set_is_resizable(false), &gamma_info.clone()->set_is_resizable(false), 1.f));
+    ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(),
diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp
index 9f9295c..035e492 100644
--- a/tests/validation/CL/ConvolutionLayer.cpp
+++ b/tests/validation/CL/ConvolutionLayer.cpp
@@ -45,7 +45,8 @@
 {
 RelativeTolerance<float>            tolerance_f32(0.05f);                 /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
 RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
-constexpr AbsoluteTolerance<float>  tolerance_q(1.0f);                    /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */
+constexpr AbsoluteTolerance<float>  tolerance_fixed(1.0f);                /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */
+constexpr AbsoluteTolerance<float>  tolerance_qasymm8(0.0);               /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
 constexpr float                     tolerance_num = 0.07f;                /**< Tolerance number */
 
 /** CNN data types */
@@ -55,6 +56,7 @@
     DataType::F32,
     DataType::QS8,
     DataType::QS16,
+    DataType::QASYMM8,
 });
 } // namespace
 
@@ -67,17 +69,22 @@
     // Set fixed point position data type allowed
     int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
 
+    auto bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+
     // Create tensors
-    CLTensor src     = create_tensor<CLTensor>(input_shape, data_type, 1, fixed_point_position);
-    CLTensor weights = create_tensor<CLTensor>(weights_shape, data_type, 1, fixed_point_position);
-    CLTensor bias    = create_tensor<CLTensor>(bias_shape, data_type, 1, fixed_point_position);
-    CLTensor dst     = create_tensor<CLTensor>(output_shape, data_type, 1, fixed_point_position);
+    CLTensor src     = create_tensor<CLTensor>(input_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
+    CLTensor weights = create_tensor<CLTensor>(weights_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
+    CLTensor bias    = create_tensor<CLTensor>(bias_shape, bias_data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
+    CLTensor dst     = create_tensor<CLTensor>(output_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
+    const QuantizationInfo src_quantization_info     = src.info()->quantization_info();
+    const QuantizationInfo weights_quantization_info = weights.info()->quantization_info();
+
     // Create and configure function
     CLConvolutionLayer conv;
     conv.configure(&src, &weights, &bias, &dst, info);
@@ -92,6 +99,10 @@
     validate(weights.info()->valid_region(), weights_valid_region);
     validate(bias.info()->valid_region(), bias_valid_region);
     validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate QuantizationInfo
+    ARM_COMPUTE_EXPECT(src.info()->quantization_info() == src_quantization_info, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(weights.info()->quantization_info() == weights_quantization_info, framework::LogLevel::ERRORS);
 }
 
 template <typename T>
@@ -140,7 +151,7 @@
 template <typename T>
 using CLConvolutionLayerFixedPointFixture = ConvolutionValidationFixedPointFixture<CLTensor, CLAccessor, CLConvolutionLayer, T>;
 
-TEST_SUITE(Quantized)
+TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
 // We test for fixed point precision [4,6]
 FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
@@ -150,7 +161,7 @@
                        framework::dataset::make("FractionalBits", 4, 7)))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_q);
+    validate(CLAccessor(_target), _reference, tolerance_fixed);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeConvolutionLayerDataset(),
                                                                                                                        framework::dataset::make("ReshapeWeights", { true, false })),
@@ -159,7 +170,7 @@
                                                                                                                        framework::dataset::make("FractionalBits", 4, 7)))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_q);
+    validate(CLAccessor(_target), _reference, tolerance_fixed);
 }
 TEST_SUITE_END()
 
@@ -172,7 +183,7 @@
                        framework::dataset::make("FractionalBits", 1, 14)))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_q);
+    validate(CLAccessor(_target), _reference, tolerance_fixed);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeConvolutionLayerDataset(),
                                                                                                                         framework::dataset::make("ReshapeWeights", { true, false })),
@@ -181,7 +192,31 @@
                                                                                                                         framework::dataset::make("FractionalBits", 1, 14)))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_q);
+    validate(CLAccessor(_target), _reference, tolerance_fixed);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+template <typename T>
+using CLConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLConvolutionLayer, T>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
+                       framework::dataset::make("ReshapeWeights", { true })),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeConvolutionLayerDataset(),
+                                                                                                                       framework::dataset::make("ReshapeWeights", { true })),
+                                                                                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                                                                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 0) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 TEST_SUITE_END()
 TEST_SUITE_END()
diff --git a/tests/validation/CL/DepthConcatenateLayer.cpp b/tests/validation/CL/DepthConcatenateLayer.cpp
index a7adde1..723cd94 100644
--- a/tests/validation/CL/DepthConcatenateLayer.cpp
+++ b/tests/validation/CL/DepthConcatenateLayer.cpp
@@ -24,7 +24,7 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLDepthConcatenate.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/framework/Asserts.h"
@@ -43,7 +43,7 @@
 TEST_SUITE(DepthConcatenateLayer)
 
 template <typename T>
-using CLDepthConcatenateLayerFixture = DepthConcatenateValidationFixture<CLTensor, ICLTensor, CLAccessor, CLDepthConcatenate, T>;
+using CLDepthConcatenateLayerFixture = DepthConcatenateLayerValidationFixture<CLTensor, ICLTensor, CLAccessor, CLDepthConcatenateLayer, T>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
@@ -68,7 +68,7 @@
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType",
                                                                                                                  DataType::F32)))
 {
     // Validate output
@@ -86,7 +86,7 @@
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(),
                                                                                                                   framework::dataset::make("DataType",
                                                                                                                           DataType::QS8)))
 {
@@ -103,7 +103,7 @@
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(),
                                                                                                                    framework::dataset::make("DataType",
                                                                                                                            DataType::QS16)))
 {
diff --git a/tests/validation/CL/DepthConvert.cpp b/tests/validation/CL/DepthConvert.cpp
deleted file mode 100644
index 57669f0..0000000
--- a/tests/validation/CL/DepthConvert.cpp
+++ /dev/null
@@ -1,484 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLDepthConvert.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ConvertPolicyDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/DepthConvertFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/** Input data sets **/
-const auto DepthConvertU8toU16Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16));
-const auto DepthConvertU8toS16Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16));
-const auto DepthConvertU8toS32Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32));
-const auto DepthConvertU16toU8Dataset             = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8));
-const auto DepthConvertU16toU32Dataset            = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32));
-const auto DepthConvertS16toU8Dataset             = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8));
-const auto DepthConvertS16toS32Dataset            = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32));
-const auto DepthConvertQS8toFP32Dataset           = combine(framework::dataset::make("DataType", DataType::QS8), framework::dataset::make("DataType", DataType::F32));
-const auto DepthConvertQS16toFP32Dataset          = combine(framework::dataset::make("DataType", DataType::QS16), framework::dataset::make("DataType", DataType::F32));
-const auto DepthConvertFP32toQS8Dataset           = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS8));
-const auto DepthConvertFP32toQS16Dataset          = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS16));
-const auto DepthConvertShiftDataset               = framework::dataset::make("Shift", 0, 7);
-const auto DepthConvertFixedPointQuantizedDataset = framework::dataset::make("FractionalBits", 1, 7);
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(DepthConvert)
-template <typename T>
-using CLDepthConvertToU16Fixture = DepthConvertValidationFixture<CLTensor, CLAccessor, CLDepthConvert, T, uint16_t>;
-template <typename T>
-using CLDepthConvertToS16Fixture = DepthConvertValidationFixture<CLTensor, CLAccessor, CLDepthConvert, T, int16_t>;
-template <typename T>
-using CLDepthConvertToS32Fixture = DepthConvertValidationFixture<CLTensor, CLAccessor, CLDepthConvert, T, int32_t>;
-template <typename T>
-using CLDepthConvertToU8Fixture = DepthConvertValidationFixture<CLTensor, CLAccessor, CLDepthConvert, T, uint8_t>;
-template <typename T>
-using CLDepthConvertToU32Fixture = DepthConvertValidationFixture<CLTensor, CLAccessor, CLDepthConvert, T, uint32_t>;
-template <typename T>
-using CLDepthConvertToFP32FixedPointFixture = DepthConvertValidationFractionalBitsFixture<CLTensor, CLAccessor, CLDepthConvert, T, float>;
-template <typename T>
-using CLDepthConvertToQS8FixedPointFixture = DepthConvertValidationFractionalBitsFixture<CLTensor, CLAccessor, CLDepthConvert, T, int8_t>;
-template <typename T>
-using CLDepthConvertToQS16FixedPointFixture = DepthConvertValidationFractionalBitsFixture<CLTensor, CLAccessor, CLDepthConvert, T, int16_t>;
-
-TEST_SUITE(U8_to_U16)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
-    CLTensor dst = create_tensor<CLTensor>(shape, DataType::U16, 1, fixed_point_position);
-
-    // Create and Configure function
-    CLDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToU16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU8toU16Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToU16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU8toU16Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(U8_to_S16)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
-    CLTensor dst = create_tensor<CLTensor>(shape, DataType::S16, 1, fixed_point_position);
-
-    // Create and Configure function
-    CLDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToS16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU8toS16Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToS16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU8toS16Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-TEST_SUITE(U8_to_S32)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
-    CLTensor dst = create_tensor<CLTensor>(shape, DataType::S32, 1, fixed_point_position);
-
-    // Create and Configure function
-    CLDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToS32Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU8toS32Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToS32Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU8toS32Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(U16_to_U8)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, DataType::U16, 1, fixed_point_position);
-    CLTensor dst = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
-
-    // Create and Configure function
-    CLDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToU8Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU16toU8Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToU8Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU16toU8Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(U16_to_U32)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, DataType::U16, 1, fixed_point_position);
-    CLTensor dst = create_tensor<CLTensor>(shape, DataType::U32, 1, fixed_point_position);
-
-    // Create and Configure function
-    CLDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToU32Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU16toU32Dataset),
-                                                                                                                  framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                  DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToU32Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU16toU32Dataset),
-                                                                                                                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(S16_to_U8)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, DataType::S16, 1, fixed_point_position);
-    CLTensor dst = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
-
-    // Create and Configure function
-    CLDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToU8Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertS16toU8Dataset),
-                                                                                                                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToU8Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertS16toU8Dataset),
-                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                              DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(S16_to_S32)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, DataType::S16, 1, fixed_point_position);
-    CLTensor dst = create_tensor<CLTensor>(shape, DataType::S32, 1, fixed_point_position);
-
-    // Create and Configure function
-    CLDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToS32Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertS16toS32Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToS32Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertS16toS32Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(Quantized_to_FP32)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })),
-                                                                           framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertFixedPointQuantizedDataset),
-               shape, dt, policy, fixed_point_position)
-{
-    int shift = 0;
-
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, dt, 1, fixed_point_position);
-    CLTensor dst = create_tensor<CLTensor>(shape, DataType::F32, 1, fixed_point_position);
-
-    // Create and Configure function
-    CLDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-FIXTURE_DATA_TEST_CASE(RunSmallQS8, CLDepthConvertToFP32FixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertQS8toFP32Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunSmallQS16, CLDepthConvertToFP32FixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertQS16toFP32Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeQS8, CLDepthConvertToFP32FixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertQS8toFP32Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeQS16, CLDepthConvertToFP32FixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertQS16toFP32Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(FP32_to_Quantized)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })),
-                                                                           framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertFixedPointQuantizedDataset),
-               shape, dt, policy, fixed_point_position)
-{
-    int shift = 0;
-
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, DataType::F32, 1, fixed_point_position);
-    CLTensor dst = create_tensor<CLTensor>(shape, dt, 1, fixed_point_position);
-
-    // Create and Configure function
-    CLDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-FIXTURE_DATA_TEST_CASE(RunSmallQS8, CLDepthConvertToQS8FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertFP32toQS8Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunSmallQS16, CLDepthConvertToQS16FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertFP32toQS16Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeQS8, CLDepthConvertToQS8FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertFP32toQS8Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeQS16, CLDepthConvertToQS16FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertFP32toQS16Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/DepthConvertLayer.cpp b/tests/validation/CL/DepthConvertLayer.cpp
new file mode 100644
index 0000000..9c6cc46
--- /dev/null
+++ b/tests/validation/CL/DepthConvertLayer.cpp
@@ -0,0 +1,492 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DepthConvertLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Input data sets **/
+const auto DepthConvertLayerU8toU16Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16));
+const auto DepthConvertLayerU8toS16Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16));
+const auto DepthConvertLayerU8toS32Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32));
+const auto DepthConvertLayerU16toU8Dataset             = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8));
+const auto DepthConvertLayerU16toU32Dataset            = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32));
+const auto DepthConvertLayerS16toU8Dataset             = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8));
+const auto DepthConvertLayerS16toS32Dataset            = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32));
+const auto DepthConvertLayerQS8toFP32Dataset           = combine(framework::dataset::make("DataType", DataType::QS8), framework::dataset::make("DataType", DataType::F32));
+const auto DepthConvertLayerQS16toFP32Dataset          = combine(framework::dataset::make("DataType", DataType::QS16), framework::dataset::make("DataType", DataType::F32));
+const auto DepthConvertLayerFP32toQS8Dataset           = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS8));
+const auto DepthConvertLayerFP32toQS16Dataset          = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS16));
+const auto DepthConvertLayerShiftDataset               = framework::dataset::make("Shift", 0, 7);
+const auto DepthConvertLayerFixedPointQuantizedDataset = framework::dataset::make("FractionalBits", 1, 7);
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(DepthConvertLayer)
+template <typename T>
+using CLDepthConvertLayerToU16Fixture = DepthConvertLayerValidationFixture<CLTensor, CLAccessor, CLDepthConvertLayer, T, uint16_t>;
+template <typename T>
+using CLDepthConvertLayerToS16Fixture = DepthConvertLayerValidationFixture<CLTensor, CLAccessor, CLDepthConvertLayer, T, int16_t>;
+template <typename T>
+using CLDepthConvertLayerToS32Fixture = DepthConvertLayerValidationFixture<CLTensor, CLAccessor, CLDepthConvertLayer, T, int32_t>;
+template <typename T>
+using CLDepthConvertLayerToU8Fixture = DepthConvertLayerValidationFixture<CLTensor, CLAccessor, CLDepthConvertLayer, T, uint8_t>;
+template <typename T>
+using CLDepthConvertLayerToU32Fixture = DepthConvertLayerValidationFixture<CLTensor, CLAccessor, CLDepthConvertLayer, T, uint32_t>;
+template <typename T>
+using CLDepthConvertLayerToFP32FixedPointFixture = DepthConvertLayerValidationFractionalBitsFixture<CLTensor, CLAccessor, CLDepthConvertLayer, T, float>;
+template <typename T>
+using CLDepthConvertLayerToQS8FixedPointFixture = DepthConvertLayerValidationFractionalBitsFixture<CLTensor, CLAccessor, CLDepthConvertLayer, T, int8_t>;
+template <typename T>
+using CLDepthConvertLayerToQS16FixedPointFixture = DepthConvertLayerValidationFractionalBitsFixture<CLTensor, CLAccessor, CLDepthConvertLayer, T, int16_t>;
+
+TEST_SUITE(U8_to_U16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
+    CLTensor dst = create_tensor<CLTensor>(shape, DataType::U16, 1, fixed_point_position);
+
+    // Create and Configure function
+    CLDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toU16Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toU16Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U8_to_S16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
+    CLTensor dst = create_tensor<CLTensor>(shape, DataType::S16, 1, fixed_point_position);
+
+    // Create and Configure function
+    CLDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToS16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toS16Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToS16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toS16Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+TEST_SUITE(U8_to_S32)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
+    CLTensor dst = create_tensor<CLTensor>(shape, DataType::S32, 1, fixed_point_position);
+
+    // Create and Configure function
+    CLDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToS32Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toS32Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToS32Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toS32Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U16_to_U8)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::U16, 1, fixed_point_position);
+    CLTensor dst = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
+
+    // Create and Configure function
+    CLDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU8Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU8Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU8Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU16toU8Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U16_to_U32)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::U16, 1, fixed_point_position);
+    CLTensor dst = create_tensor<CLTensor>(shape, DataType::U32, 1, fixed_point_position);
+
+    // Create and Configure function
+    CLDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU32Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU32Dataset),
+                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                       DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU32Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU16toU32Dataset),
+                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                     DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(S16_to_U8)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::S16, 1, fixed_point_position);
+    CLTensor dst = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
+
+    // Create and Configure function
+    CLDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU8Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS16toU8Dataset),
+                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                     DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU8Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS16toU8Dataset),
+                                                                                                                   framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                   DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(S16_to_S32)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::S16, 1, fixed_point_position);
+    CLTensor dst = create_tensor<CLTensor>(shape, DataType::S32, 1, fixed_point_position);
+
+    // Create and Configure function
+    CLDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToS32Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS16toS32Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToS32Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS16toS32Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(Quantized_to_FP32)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })),
+                                                                           framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerFixedPointQuantizedDataset),
+               shape, dt, policy, fixed_point_position)
+{
+    int shift = 0;
+
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, dt, 1, fixed_point_position);
+    CLTensor dst = create_tensor<CLTensor>(shape, DataType::F32, 1, fixed_point_position);
+
+    // Create and Configure function
+    CLDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallQS8, CLDepthConvertLayerToFP32FixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+                       DepthConvertLayerQS8toFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallQS16, CLDepthConvertLayerToFP32FixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+                       DepthConvertLayerQS16toFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeQS8, CLDepthConvertLayerToFP32FixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       DepthConvertLayerQS8toFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeQS16, CLDepthConvertLayerToFP32FixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       DepthConvertLayerQS16toFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32_to_Quantized)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })),
+                                                                           framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerFixedPointQuantizedDataset),
+               shape, dt, policy, fixed_point_position)
+{
+    int shift = 0;
+
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::F32, 1, fixed_point_position);
+    CLTensor dst = create_tensor<CLTensor>(shape, dt, 1, fixed_point_position);
+
+    // Create and Configure function
+    CLDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallQS8, CLDepthConvertLayerToQS8FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+                       DepthConvertLayerFP32toQS8Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallQS16, CLDepthConvertLayerToQS16FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+                       DepthConvertLayerFP32toQS16Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeQS8, CLDepthConvertLayerToQS8FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       DepthConvertLayerFP32toQS8Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeQS16, CLDepthConvertLayerToQS16FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       DepthConvertLayerFP32toQS16Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/DepthwiseConvolution.cpp b/tests/validation/CL/DepthwiseConvolution.cpp
deleted file mode 100644
index 5f1bde8..0000000
--- a/tests/validation/CL/DepthwiseConvolution.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/DepthwiseConvolutionDataset.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/DepthwiseConvolutionFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr RelativeTolerance<float> tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(DepthwiseConvolutionLayer)
-
-template <typename T>
-using CLDepthwiseConvolutionFixture = DepthwiseConvolutionValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolution, T>;
-
-TEST_SUITE(Generic)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionFixture<float>, framework::DatasetMode::PRECOMMIT, datasets::SmallDepthwiseConvolutionDataset())
-{
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionFixture<float>, framework::DatasetMode::NIGHTLY, datasets::LargeDepthwiseConvolutionDataset())
-{
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-TEST_SUITE_END()
-
-template <typename T>
-using CLDepthwiseConvolutionFixture3x3 = DepthwiseConvolutionValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolution3x3, T>;
-
-TEST_SUITE(W3x3)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionFixture3x3<float>, framework::DatasetMode::PRECOMMIT, datasets::SmallDepthwiseConvolutionDataset3x3())
-{
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionFixture3x3<float>, framework::DatasetMode::NIGHTLY, datasets::LargeDepthwiseConvolutionDataset3x3())
-{
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
new file mode 100644
index 0000000..92a2773
--- /dev/null
+++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/DepthwiseConvolutionLayerDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr RelativeTolerance<float>   tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+constexpr RelativeTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(DepthwiseConvolutionLayer)
+
+template <typename T>
+using CLDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T>;
+
+TEST_SUITE(Generic)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset(), framework::dataset::make("DataType",
+                                                                                                                 DataType::F32)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+                                                                                                                     framework::dataset::make("DataType",
+                                                                                                                             DataType::F32)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+
+template <typename T>
+using CLDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer3x3, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                    framework::dataset::make("DataType",
+                                                                                                                            DataType::F32)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::F32)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+template <typename T>
+using CLDepthwiseConvolutionLayerQuantizedFixture3x3 = DepthwiseConvolutionLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer3x3, T>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127) })))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127) })))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/Dilate.cpp b/tests/validation/CL/Dilate.cpp
new file mode 100644
index 0000000..185150b
--- /dev/null
+++ b/tests/validation/CL/Dilate.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLDilate.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DilateFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr unsigned int filter_size = 3;              /* Size of the kernel/filter in number of elements. */
+constexpr BorderSize   border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(Dilate)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+                                                                   datasets::BorderModes()),
+               shape, data_type, border_mode)
+{
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, data_type);
+    CLTensor dst = create_tensor<CLTensor>(shape, data_type);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLDilate dilate;
+    dilate.configure(&src, &dst, border_mode);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), border_size);
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+    calculator.set_border_size(1);
+    calculator.set_border_mode(border_mode);
+
+    const PaddingSize dst_padding = calculator.required_padding();
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-1);
+
+    const PaddingSize src_padding = calculator.required_padding();
+
+    validate(src.info()->padding(), src_padding);
+    validate(dst.info()->padding(), dst_padding);
+}
+
+template <typename T>
+using CLDilateFixture = DilateValidationFixture<CLTensor, CLAccessor, CLDilate, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDilateFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                              DataType::U8)),
+                                                                                                      datasets::BorderModes()))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDilateFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                            DataType::U8)),
+                                                                                                    datasets::BorderModes()))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/DirectConvolutionLayer.cpp b/tests/validation/CL/DirectConvolutionLayer.cpp
index df42d0a..94e9b95 100644
--- a/tests/validation/CL/DirectConvolutionLayer.cpp
+++ b/tests/validation/CL/DirectConvolutionLayer.cpp
@@ -27,6 +27,7 @@
 #include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/PaddingCalculator.h"
+#include "tests/datasets/DirectConvolutionLayerDataset.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
@@ -47,21 +48,11 @@
 RelativeTolerance<float> tolerance_fp32(0.02f);     /**< Tolerance for floating point tests */
 constexpr float          tolerance_num = 0.07f;     /**< Tolerance number */
 
-constexpr AbsoluteTolerance<int8_t>  tolerance_qs8(0);  /**< Tolerance for fixed point tests */
-constexpr AbsoluteTolerance<int16_t> tolerance_qs16(0); /**< Tolerance for fixed point tests */
+constexpr AbsoluteTolerance<int8_t>  tolerance_qs8(0);     /**< Tolerance for fixed point tests */
+constexpr AbsoluteTolerance<int16_t> tolerance_qs16(0);    /**< Tolerance for fixed point tests */
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance for quantized tests */
 
 /** Direct convolution data set. */
-const auto data_quantized = combine(datasets::SmallDirectConvolutionShapes(),
-                                    combine(framework::dataset::make("StrideX", 1, 3),
-                                            combine(framework::dataset::make("StrideY", 1, 3),
-                                                    combine(concat(combine(framework::dataset::make("PadX", 0),
-                                                                           combine(framework::dataset::make("PadY", 0),
-                                                                                   framework::dataset::make("KernelSize", 1))),
-                                                                   combine(framework::dataset::make("PadX", 0, 2),
-                                                                           combine(framework::dataset::make("PadY", 0, 2),
-                                                                                   framework::dataset::make("KernelSize", { 3 })))),
-                                                            framework::dataset::make("NumKernels", { 1, 4, 8, 16 })))));
-
 const auto data = combine(datasets::SmallDirectConvolutionShapes(),
                           combine(framework::dataset::make("StrideX", 1, 3),
                                   combine(framework::dataset::make("StrideY", 1, 3),
@@ -72,13 +63,97 @@
                                                                  combine(framework::dataset::make("PadY", 0, 2),
                                                                          framework::dataset::make("KernelSize", { 3, 5 })))),
                                                   framework::dataset::make("NumKernels", { 1, 4, 8, 16 })))));
+const auto data_fixed_point = combine(datasets::SmallDirectConvolutionShapes(),
+                                      combine(framework::dataset::make("StrideX", 1, 3),
+                                              combine(framework::dataset::make("StrideY", 1, 3),
+                                                      combine(concat(combine(framework::dataset::make("PadX", 0),
+                                                                             combine(framework::dataset::make("PadY", 0),
+                                                                                     framework::dataset::make("KernelSize", 1))),
+                                                                     combine(framework::dataset::make("PadX", 0, 2),
+                                                                             combine(framework::dataset::make("PadY", 0, 2),
+                                                                                     framework::dataset::make("KernelSize", { 3 })))),
+                                                              framework::dataset::make("NumKernels", { 1, 4, 8, 16 })))));
 } // namespace
 
 TEST_SUITE(CL)
 TEST_SUITE(DirectConvolutionLayer)
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type input/weights
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching input feature maps
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Unsupported kernel width
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Non-rectangular weights dimensions
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid weights dimensions
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid stride
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid biases size
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid biases dimensions
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid output size
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Window shrink
+                                                       TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32, 0),
+                                                     }),
+               framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F16, 0),
+                                                        TensorInfo(TensorShape(3U, 3U, 3U, 4U), 1, DataType::F32, 0),
+                                                        TensorInfo(TensorShape(9U, 9U, 2U, 4U), 1, DataType::F32, 0),
+                                                        TensorInfo(TensorShape(5U, 3U, 2U, 4U), 1, DataType::F32, 0),
+                                                        TensorInfo(TensorShape(3U, 3U, 2U, 4U, 3U), 1, DataType::F32, 0),
+                                                        TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
+                                                        TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
+                                                        TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
+                                                        TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
+                                                        TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
+                                                        TensorInfo(TensorShape(1U, 1U, 2U, 4U), 1, DataType::F32, 0),
+                                                     })),
+               framework::dataset::make("BiasesInfo",{ TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(3U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(4U, 2U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                     })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(26U, 11U, 4U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32, 0),
+                                                     })),
+               framework::dataset::make("ConvInfo",  { PadStrideInfo(1, 1, 0, 0),
+                                                       PadStrideInfo(1, 1, 0, 0),
+                                                       PadStrideInfo(1, 1, 0, 0),
+                                                       PadStrideInfo(1, 1, 0, 0),
+                                                       PadStrideInfo(1, 1, 0, 0),
+                                                       PadStrideInfo(3, 3, 0, 0),
+                                                       PadStrideInfo(1, 1, 0, 0),
+                                                       PadStrideInfo(1, 1, 0, 0),
+                                                       PadStrideInfo(1, 1, 0, 0),
+                                                       PadStrideInfo(1, 1, 0, 0),
+                                                       PadStrideInfo(1, 1, 0, 0),
+                                                      })),
+               framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false, false, true })),
+               input_info, weights_info, biases_info, output_info, conv_info, expected)
+{
+    bool is_valid = bool(CLDirectConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info));
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 template <typename T>
 using CLDirectConvolutionLayerFixture = DirectConvolutionValidationFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>;
+template <typename T>
+using CLDirectConvolutionValidationWithTensorShapesFixture = DirectConvolutionValidationWithTensorShapesFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
@@ -96,14 +171,23 @@
     validate(CLAccessor(_target), _reference, tolerance_fp32);
 }
 TEST_SUITE_END()
+
+TEST_SUITE(FP32_CustomDataset)
+FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionValidationWithTensorShapesFixture<float>, framework::DatasetMode::ALL, combine(datasets::DirectConvolutionLayerDataset(),
+                       framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END()
 TEST_SUITE_END()
 
 template <typename T>
 using CLDirectConvolutionLayerFixedPointFixture = DirectConvolutionValidationFixedPointFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>;
 
-TEST_SUITE(Quantized)
+TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
-FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionLayerFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(data_quantized, framework::dataset::make("DataType", DataType::QS8)),
+FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionLayerFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(data_fixed_point, framework::dataset::make("DataType", DataType::QS8)),
                                                                                                                     framework::dataset::make("FractionalBits", 2, 7)))
 {
     // Validate output
@@ -112,7 +196,7 @@
 TEST_SUITE_END()
 
 TEST_SUITE(QS16)
-FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionLayerFixedPointFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(data_quantized, framework::dataset::make("DataType", DataType::QS16)),
+FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionLayerFixedPointFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(data_fixed_point, framework::dataset::make("DataType", DataType::QS16)),
                                                                                                                      framework::dataset::make("FractionalBits", 2, 15)))
 {
     // Validate output
@@ -121,6 +205,32 @@
 TEST_SUITE_END()
 TEST_SUITE_END()
 
+template <typename T>
+using CLDirectConvolutionLayerQuantizedFixture = DirectConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>;
+template <typename T>
+using CLDirectConvolutionValidationWithTensorShapesQuantizedFixture = DirectConvolutionValidationWithTensorShapesQuantizedFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(data, framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                                                                                    framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(QASYMM8_CustomDataset)
+FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionValidationWithTensorShapesQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(datasets::DirectConvolutionLayerDataset(),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
 TEST_SUITE_END()
 TEST_SUITE_END()
 } // namespace validation
diff --git a/tests/validation/CL/Erode.cpp b/tests/validation/CL/Erode.cpp
new file mode 100644
index 0000000..dce72d7
--- /dev/null
+++ b/tests/validation/CL/Erode.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLErode.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ErodeFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr unsigned int filter_size = 3;              /* Size of the kernel/filter in number of elements. */
+constexpr BorderSize   border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(Erode)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+                                                                   datasets::BorderModes()),
+               shape, data_type, border_mode)
+{
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, data_type);
+    CLTensor dst = create_tensor<CLTensor>(shape, data_type);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLErode erode;
+    erode.configure(&src, &dst, border_mode);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), border_size);
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+    calculator.set_border_size(1);
+    calculator.set_border_mode(border_mode);
+
+    const PaddingSize dst_padding = calculator.required_padding();
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-1);
+
+    const PaddingSize src_padding = calculator.required_padding();
+
+    validate(src.info()->padding(), src_padding);
+    validate(dst.info()->padding(), dst_padding);
+}
+
+template <typename T>
+using CLErodeFixture = ErodeValidationFixture<CLTensor, CLAccessor, CLErode, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLErodeFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                             DataType::U8)),
+                                                                                                     datasets::BorderModes()))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLErodeFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                           DataType::U8)),
+                                                                                                   datasets::BorderModes()))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/FixedPoint/FixedPoint_QS8.cpp b/tests/validation/CL/FixedPoint/FixedPoint_QS8.cpp
index 0fb4641..186fc4c 100644
--- a/tests/validation/CL/FixedPoint/FixedPoint_QS8.cpp
+++ b/tests/validation/CL/FixedPoint/FixedPoint_QS8.cpp
@@ -56,7 +56,7 @@
 using CLFixedPointFixture = FixedPointValidationFixture<CLTensor, CLAccessor, T>;
 
 TEST_SUITE(Exp)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShape(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::QS8)),
                                                                                                            framework::dataset::make("FixedPointOp", FixedPointOp::EXP)),
                                                                                                    framework::dataset::make("FractionalBits", 1, 6)))
@@ -67,7 +67,7 @@
 TEST_SUITE_END()
 
 TEST_SUITE(Log)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShape(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::QS8)),
                                                                                                            framework::dataset::make("FixedPointOp", FixedPointOp::LOG)),
                                                                                                    framework::dataset::make("FractionalBits", 3, 6)))
@@ -78,7 +78,7 @@
 TEST_SUITE_END()
 
 TEST_SUITE(Invsqrt)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShape(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::QS8)),
                                                                                                            framework::dataset::make("FixedPointOp", FixedPointOp::INV_SQRT)),
                                                                                                    framework::dataset::make("FractionalBits", 1, 6)))
diff --git a/tests/validation/CL/FullyConnectedLayer.cpp b/tests/validation/CL/FullyConnectedLayer.cpp
index 35b9d29..0d8c877 100644
--- a/tests/validation/CL/FullyConnectedLayer.cpp
+++ b/tests/validation/CL/FullyConnectedLayer.cpp
@@ -49,6 +49,8 @@
 
 /** Tolerance for fixed point operations */
 constexpr AbsoluteTolerance<float> tolerance_fixed_point(1.f);
+/** Tolerance for quantized asymmetric operations */
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);
 
 /** CNN data types */
 const auto CNNDataTypes = framework::dataset::make("DataType",
@@ -57,6 +59,7 @@
     DataType::F32,
     DataType::QS8,
     DataType::QS16,
+    DataType::QASYMM8,
 });
 
 const auto FullyConnectedParameters = combine(framework::dataset::make("TransposeWeights", { false, true }), framework::dataset::make("ReshapeWeights", { false, true }));
@@ -71,7 +74,9 @@
                src_shape, weights_shape, bias_shape, dst_shape, transpose_weights, reshape_weights, data_type)
 {
     // Set fixed point position data type allowed
-    int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
+    const int              fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
+    const DataType         bias_data_type       = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+    const QuantizationInfo quantization_info    = is_data_type_quantized_asymmetric(data_type) ? QuantizationInfo(2.f / 255.f, 127) : QuantizationInfo();
 
     TensorShape ws(weights_shape);
 
@@ -84,16 +89,19 @@
     }
 
     // Create tensors
-    CLTensor src     = create_tensor<CLTensor>(src_shape, data_type, 1, fixed_point_position);
-    CLTensor weights = create_tensor<CLTensor>(ws, data_type, 1, fixed_point_position);
-    CLTensor bias    = create_tensor<CLTensor>(bias_shape, data_type, 1, fixed_point_position);
-    CLTensor dst     = create_tensor<CLTensor>(dst_shape, data_type, 1, fixed_point_position);
+    CLTensor src     = create_tensor<CLTensor>(src_shape, data_type, 1, fixed_point_position, quantization_info);
+    CLTensor weights = create_tensor<CLTensor>(ws, data_type, 1, fixed_point_position, quantization_info);
+    CLTensor bias    = create_tensor<CLTensor>(bias_shape, bias_data_type, 1, fixed_point_position, quantization_info);
+    CLTensor dst     = create_tensor<CLTensor>(dst_shape, data_type, 1, fixed_point_position, quantization_info);
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
+    const QuantizationInfo src_quantization_info     = src.info()->quantization_info();
+    const QuantizationInfo weights_quantization_info = weights.info()->quantization_info();
+
     // Create and configure function.
     CLFullyConnectedLayer fc;
     fc.configure(&src, &weights, &bias, &dst, transpose_weights, !reshape_weights);
@@ -101,6 +109,10 @@
     // Validate valid region
     const ValidRegion dst_valid_region = shape_to_valid_region(dst_shape);
     validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate QuantizationInfo
+    ARM_COMPUTE_EXPECT(src.info()->quantization_info() == src_quantization_info, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(weights.info()->quantization_info() == weights_quantization_info, framework::LogLevel::ERRORS);
 }
 
 template <typename T>
@@ -143,7 +155,7 @@
 template <typename T>
 using CLFullyConnectedLayerFixedPointFixture = FullyConnectedLayerValidationFixedPointFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T, false>;
 
-TEST_SUITE(Quantized)
+TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
 // Testing for fixed point position [1,6) as reciprocal limits the maximum fixed point position to 5
 FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(),
@@ -189,6 +201,32 @@
 TEST_SUITE_END()
 TEST_SUITE_END()
 
+template <typename T>
+using CLFullyConnectedLayerQuantizedFixture = FullyConnectedLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T, false>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(
+                           combine(datasets::SmallFullyConnectedLayerDataset(),
+                                   FullyConnectedParameters),
+                           framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 10) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(
+                           combine(datasets::LargeFullyConnectedLayerDataset(),
+                                   FullyConnectedParameters),
+                           framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 256.f, 10) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
 TEST_SUITE_END()
 TEST_SUITE_END()
 } // namespace validation
diff --git a/tests/validation/CL/GEMMLowp.cpp b/tests/validation/CL/GEMMLowp.cpp
new file mode 100644
index 0000000..5148a31
--- /dev/null
+++ b/tests/validation/CL/GEMMLowp.cpp
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/LargeGEMMLowpDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/datasets/SmallGEMMLowpDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/GEMMLowpFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(GEMMLowp)
+
+TEST_SUITE(MatrixMultiplyCore)
+using CLGEMMLowpMatrixMultiplyCoreFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore>;
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::concat(datasets::SmallGEMMLowpDataset(), datasets::LargeGEMMLowpDataset()),
+               shape_a, shape_b, shape_c, a_offset, b_offset)
+{
+    // Create tensors
+    CLTensor a = create_tensor<CLTensor>(shape_a, DataType::QASYMM8);
+    CLTensor b = create_tensor<CLTensor>(shape_b, DataType::QASYMM8);
+    CLTensor c = create_tensor<CLTensor>(shape_c, DataType::S32);
+
+    a.info()->set_quantization_info(QuantizationInfo(1.0f / 255, a_offset));
+    b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset));
+
+    ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLGEMMLowpMatrixMultiplyCore gemmlowp_mm;
+    gemmlowp_mm.configure(&a, &b, &c);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset())
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset())
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+TEST_SUITE_END() // MatrixMultiplyCore
+
+TEST_SUITE(OutputStage)
+TEST_SUITE(QuantizeDownInt32ToUint8Scale)
+
+const auto quantize_down_int32_to_uint8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
+                                                      3)
+                                                      * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+
+const auto quantize_down_int32_to_uint8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1,
+                                                           2)
+                                                           * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", 0, 2) * framework::dataset::make("max", 171, 173) * framework::dataset::make("addBias", { false, true });
+
+using CLGEMMLowpQuantizeDownInt32ToUint8ScaleFixture = GEMMLowpQuantizeDownInt32ToUint8ScaleValidationFixture<CLTensor, CLAccessor, CLGEMMLowpQuantizeDownInt32ToUint8Scale>;
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), quantize_down_int32_to_uint8_scale_cases),
+               shape, result_offset, result_mult_int, result_shift, min, max, add_bias)
+{
+    TensorShape shape_bias(shape[0]);
+
+    // Create tensors
+    CLTensor in   = create_tensor<CLTensor>(shape, DataType::S32);
+    CLTensor bias = create_tensor<CLTensor>(shape_bias, DataType::S32);
+    CLTensor out  = create_tensor<CLTensor>(shape, DataType::QASYMM8);
+
+    ARM_COMPUTE_EXPECT(in.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(out.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLGEMMLowpQuantizeDownInt32ToUint8Scale output_stage;
+    output_stage.configure(&in, add_bias ? &bias : nullptr, &out, result_offset, result_mult_int, result_shift, min, max);
+
+    // Validate valid region input and output
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(in.info()->valid_region(), valid_region);
+    validate(out.info()->valid_region(), valid_region);
+
+    // Validate valid region bias
+    if(add_bias)
+    {
+        const ValidRegion valid_region_bias = shape_to_valid_region(shape_bias);
+        validate(bias.info()->valid_region(), valid_region_bias);
+    }
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(in.info()->padding(), padding);
+    validate(out.info()->padding(), padding);
+
+    if(add_bias)
+    {
+        validate(bias.info()->padding(), padding);
+    }
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToUint8ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpQuantizeDownInt32ToUint8ScaleFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), quantize_down_int32_to_uint8_scale_cases))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+TEST_SUITE(BoundedReLu)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToUint8ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_relu_cases))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpQuantizeDownInt32ToUint8ScaleFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(),
+                       quantize_down_int32_to_uint8_scale_relu_cases))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // BoundedReLu
+TEST_SUITE_END() // QuantizeDownInt32ToUint8Scale
+
+TEST_SUITE(QuantizeDownInt32ToUint8ScaleByFixedPoint)
+
+const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
+                                                                    2)
+                                                                    * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+
+const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
+                                                                         2)
+                                                                         * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0, 2) * framework::dataset::make("max", 171, 174) * framework::dataset::make("addBias", { false, true });
+
+using CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture =
+    GEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointValidationFixture<CLTensor, CLAccessor, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint>;
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()),
+                                                                   quantize_down_int32_to_uint8_scale_by_fixedpoint_cases),
+               shape, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max, add_bias)
+{
+    TensorShape shape_bias(shape[0]);
+
+    // Create tensors
+    CLTensor in   = create_tensor<CLTensor>(shape, DataType::S32);
+    CLTensor bias = create_tensor<CLTensor>(shape_bias, DataType::S32);
+    CLTensor out  = create_tensor<CLTensor>(shape, DataType::QASYMM8);
+
+    ARM_COMPUTE_EXPECT(in.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(out.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint output_stage;
+    output_stage.configure(&in, add_bias ? &bias : nullptr, &out, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
+
+    // Validate valid region input and output
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(in.info()->valid_region(), valid_region);
+    validate(out.info()->valid_region(), valid_region);
+
+    // Validate valid region bias
+    if(add_bias)
+    {
+        const ValidRegion valid_region_bias = shape_to_valid_region(shape_bias);
+        validate(bias.info()->valid_region(), valid_region_bias);
+    }
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(in.info()->padding(), padding);
+    validate(out.info()->padding(), padding);
+
+    if(add_bias)
+    {
+        validate(bias.info()->padding(), padding);
+    }
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
+                       quantize_down_int32_to_uint8_scale_by_fixedpoint_cases))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(),
+                       quantize_down_int32_to_uint8_scale_by_fixedpoint_cases))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+TEST_SUITE(BoundedReLu)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
+                       quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(),
+                       quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // BoundedReLu
+TEST_SUITE_END() // QuantizeDownInt32ToUint8ScaleByFixedPoint
+
+TEST_SUITE_END() // OutputStage
+TEST_SUITE_END() // GEMMLowp
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
\ No newline at end of file
diff --git a/tests/validation/CL/Histogram.cpp b/tests/validation/CL/Histogram.cpp
new file mode 100644
index 0000000..318bc1e
--- /dev/null
+++ b/tests/validation/CL/Histogram.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLDistribution1D.h"
+#include "arm_compute/runtime/CL/functions/CLHistogram.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/HistogramFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(Histogram)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()),
+                                                                   framework::dataset::make("DataType", DataType::U8)),
+               shape, data_type)
+{
+    // Setup Distribution
+    std::mt19937                            gen(library->seed());
+    std::uniform_int_distribution<size_t>   distribution_size_t(1, 30);
+    const size_t                            num_bins = distribution_size_t(gen);
+    std::uniform_int_distribution<int32_t>  distribution_int32_t(0, 125);
+    const size_t                            offset = distribution_int32_t(gen);
+    std::uniform_int_distribution<uint32_t> distribution_uint32_t(1, 255 - offset);
+    const size_t                            range = distribution_uint32_t(gen);
+    CLDistribution1D                        distribution_dst(num_bins, offset, range);
+
+    // Create tensors
+    CLTensor    src = create_tensor<CLTensor>(shape, data_type);
+    TensorShape dst_shape(num_bins);
+    CLTensor    dst = create_tensor<CLTensor>(dst_shape, DataType::U32);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLHistogram histogram;
+    histogram.configure(&src, &distribution_dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    const ValidRegion valid_region_dst = shape_to_valid_region(dst_shape);
+    validate(dst.info()->valid_region(), valid_region_dst);
+
+    // Validate padding
+    const PaddingSize padding;
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+template <typename T>
+using CLHistogramFixture = HistogramValidationFixture<CLTensor, CLAccessor, CLHistogram, T, CLDistribution1D>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLHistogramFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
+                                                                                                         DataType::U8)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLHistogramFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
+                                                                                                       DataType::U8)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/L2Normalize.cpp b/tests/validation/CL/L2NormalizeLayer.cpp
similarity index 84%
rename from tests/validation/CL/L2Normalize.cpp
rename to tests/validation/CL/L2NormalizeLayer.cpp
index 4b0820c..bc2374b 100644
--- a/tests/validation/CL/L2Normalize.cpp
+++ b/tests/validation/CL/L2NormalizeLayer.cpp
@@ -24,7 +24,7 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLL2Normalize.h"
+#include "arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/PaddingCalculator.h"
 #include "tests/datasets/ShapeDatasets.h"
@@ -32,7 +32,7 @@
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/L2NormalizeFixture.h"
+#include "tests/validation/fixtures/L2NormalizeLayerFixture.h"
 
 namespace arm_compute
 {
@@ -48,20 +48,20 @@
 } // namespace
 
 TEST_SUITE(CL)
-TEST_SUITE(L2Normalize)
+TEST_SUITE(L2NormalizeLayer)
 
 template <typename T>
-using CLL2NormalizeFixture = L2NormalizeValidationFixture<CLTensor, CLAccessor, CLL2Normalize, T>;
+using CLL2NormalizeLayerFixture = L2NormalizeLayerValidationFixture<CLTensor, CLAccessor, CLL2NormalizeLayer, T>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLL2NormalizeFixture<float>, framework::DatasetMode::PRECOMMIT,
+FIXTURE_DATA_TEST_CASE(RunSmall, CLL2NormalizeLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
                        combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLL2NormalizeFixture<float>, framework::DatasetMode::NIGHTLY,
+FIXTURE_DATA_TEST_CASE(RunLarge, CLL2NormalizeLayerFixture<float>, framework::DatasetMode::NIGHTLY,
                        combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 })))
 {
     // Validate output
diff --git a/tests/validation/CL/Magnitude.cpp b/tests/validation/CL/Magnitude.cpp
new file mode 100644
index 0000000..b002239
--- /dev/null
+++ b/tests/validation/CL/Magnitude.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLMagnitude.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/MagnitudeFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+template <typename T>
+AbsoluteTolerance<T> tolerance(MagnitudeType magnitude_type)
+{
+    return AbsoluteTolerance<T>((MagnitudeType::L1NORM == magnitude_type) ? 0 : 1);
+}
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(Magnitude)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::S16, DataType::S32 })),
+               shape, data_type)
+{
+    // Create tensors
+    CLTensor src1 = create_tensor<CLTensor>(shape, data_type);
+    CLTensor src2 = create_tensor<CLTensor>(shape, data_type);
+    CLTensor dst  = create_tensor<CLTensor>(shape, data_type);
+
+    ARM_COMPUTE_EXPECT(src1.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(src2.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function (default MagnitudeType::L2NORM)
+    CLMagnitude magnitude;
+    magnitude.configure(&src1, &src2, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+
+    validate(src1.info()->padding(), padding);
+    validate(src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+template <typename T>
+using CLMagnitudeFixture = MagnitudeValidationFixture<CLTensor, CLAccessor, CLMagnitude, T>;
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMagnitudeFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), framework::dataset::make("Format", Format::S16)),
+                                                                                                                 framework::dataset::make("MagnitudeType", { MagnitudeType::L1NORM, MagnitudeType::L2NORM })),
+                                                                                                         framework::dataset::make("UseFP16", false)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance<int16_t>(_magnitude_type));
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLMagnitudeFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), framework::dataset::make("Format", Format::S16)),
+                                                                                                               framework::dataset::make("MagnitudeType", { MagnitudeType::L1NORM, MagnitudeType::L2NORM })),
+                                                                                                       framework::dataset::make("UseFP16", false)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance<int16_t>(_magnitude_type));
+}
+TEST_SUITE_END() // S16
+
+TEST_SUITE(S32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMagnitudeFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), framework::dataset::make("Format", Format::S32)),
+                                                                                                                 framework::dataset::make("MagnitudeType", { MagnitudeType::L1NORM, MagnitudeType::L2NORM })),
+                                                                                                         framework::dataset::make("UseFP16", false)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance<int32_t>(_magnitude_type));
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLMagnitudeFixture<int32_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), framework::dataset::make("Format", Format::S32)),
+                                                                                                               framework::dataset::make("MagnitudeType", { MagnitudeType::L1NORM, MagnitudeType::L2NORM })),
+                                                                                                       framework::dataset::make("UseFP16", false)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance<int32_t>(_magnitude_type));
+}
+TEST_SUITE_END() // S32
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/Median3x3.cpp b/tests/validation/CL/Median3x3.cpp
new file mode 100644
index 0000000..39441f3
--- /dev/null
+++ b/tests/validation/CL/Median3x3.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLMedian3x3.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/Median3x3Fixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr unsigned int filter_size = 3;              /* Size of the kernel/filter in number of elements. */
+constexpr BorderSize   border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(Median3x3)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+                                                                   datasets::BorderModes()),
+               shape, data_type, border_mode)
+{
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, data_type);
+    CLTensor dst = create_tensor<CLTensor>(shape, data_type);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLMedian3x3 median3x3;
+    median3x3.configure(&src, &dst, border_mode);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), border_size);
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+    calculator.set_border_size(1);
+    calculator.set_border_mode(border_mode);
+
+    const PaddingSize dst_padding = calculator.required_padding();
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-1);
+
+    const PaddingSize src_padding = calculator.required_padding();
+
+    validate(src.info()->padding(), src_padding);
+    validate(dst.info()->padding(), dst_padding);
+}
+
+template <typename T>
+using CLMedian3x3Fixture = Median3x3ValidationFixture<CLTensor, CLAccessor, CLMedian3x3, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMedian3x3Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)),
+                                                                                                         datasets::BorderModes()))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLMedian3x3Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                               DataType::U8)),
+                                                                                                       datasets::BorderModes()))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/NormalizationLayer.cpp b/tests/validation/CL/NormalizationLayer.cpp
index 9db27ef..c133df0 100644
--- a/tests/validation/CL/NormalizationLayer.cpp
+++ b/tests/validation/CL/NormalizationLayer.cpp
@@ -49,28 +49,73 @@
 
 /** Tolerance for fixed point operations */
 constexpr AbsoluteTolerance<int8_t>  tolerance_qs8(2);
-constexpr AbsoluteTolerance<int16_t> tolerance_qs16(3);
+constexpr AbsoluteTolerance<int16_t> tolerance_qs16(4);
 
 /** Input data set. */
-const auto NormalizationDataset = combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("NormType", { NormType::IN_MAP_1D, NormType::CROSS_MAP })),
-                                                  framework::dataset::make("NormalizationSize", 3, 9, 2)),
-                                          framework::dataset::make("Beta", { 0.5f, 1.f, 2.f }));
+const auto NormalizationDataset = combine(combine(combine(combine(datasets::SmallShapes(), datasets::NormalizationTypes()),
+                                                          framework::dataset::make("NormalizationSize", 3, 9, 2)),
+                                                  framework::dataset::make("Beta", { 0.5f, 1.f, 2.f })),
+                                          framework::dataset::make("IsScaled", { true }));
+const auto NormalizationDatasetFP16 = combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("NormType", { NormType::IN_MAP_1D, NormType::CROSS_MAP })),
+                                                              framework::dataset::make("NormalizationSize", 3, 9, 2)),
+                                                      framework::dataset::make("Beta", { 0.5f, 1.f, 2.f })),
+                                              framework::dataset::make("IsScaled", { true }));
+
+const auto NormalizationDatasetFP32 = combine(combine(combine(combine(datasets::SmallShapes(), datasets::NormalizationTypes()),
+                                                              framework::dataset::make("NormalizationSize", 3, 9, 2)),
+                                                      framework::dataset::make("Beta", { 0.5f, 1.f, 2.f })),
+                                              framework::dataset::make("IsScaled", { true, false }));
 } // namespace
 
 TEST_SUITE(CL)
 TEST_SUITE(NormalizationLayer)
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type input/output
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching shapes
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Even normalization
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Non implemented IN_MAP_2D
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 4), // Mismatching fixed point position
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Window shrink
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32, 0),
+                                                     }),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16, 0),
+                                                       TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32, 0),
+                                                     })),
+               framework::dataset::make("NormInfo",  { NormalizationLayerInfo(NormType::IN_MAP_1D, 5),
+                                                       NormalizationLayerInfo(NormType::IN_MAP_1D, 5),
+                                                       NormalizationLayerInfo(NormType::IN_MAP_1D, 4),
+                                                       NormalizationLayerInfo(NormType::IN_MAP_2D, 5),
+                                                       NormalizationLayerInfo(NormType::IN_MAP_1D, 5),
+                                                       NormalizationLayerInfo(NormType::IN_MAP_1D, 5),
+                                                       NormalizationLayerInfo(NormType::CROSS_MAP, 5),
+                                                      })),
+               framework::dataset::make("Expected", { false, false, false, false, false, false, true })),
+               input_info, output_info, norm_info, expected)
+{
+    ARM_COMPUTE_EXPECT(bool(CLNormalizationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), norm_info)) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 template <typename T>
 using CLNormalizationLayerFixture = NormalizationValidationFixture<CLTensor, CLAccessor, CLNormalizationLayer, T>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(NormalizationDatasetFP16, framework::dataset::make("DataType", DataType::F16)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLNormalizationLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLNormalizationLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(NormalizationDatasetFP16, framework::dataset::make("DataType", DataType::F16)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
@@ -78,12 +123,12 @@
 TEST_SUITE_END()
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLNormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLNormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(NormalizationDatasetFP32, framework::dataset::make("DataType", DataType::F32)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLNormalizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLNormalizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(NormalizationDatasetFP32, framework::dataset::make("DataType", DataType::F32)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
diff --git a/tests/validation/CL/Phase.cpp b/tests/validation/CL/Phase.cpp
new file mode 100644
index 0000000..3cb6f68
--- /dev/null
+++ b/tests/validation/CL/Phase.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLPhase.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/PhaseFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<uint8_t> tolerance_value(1);
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(Phase)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::S16, DataType::S32 })),
+               shape, data_type)
+{
+    // Create tensors
+    CLTensor src1 = create_tensor<CLTensor>(shape, data_type);
+    CLTensor src2 = create_tensor<CLTensor>(shape, data_type);
+    CLTensor dst  = create_tensor<CLTensor>(shape, DataType::U8);
+
+    ARM_COMPUTE_EXPECT(src1.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(src2.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLPhase phase;
+    phase.configure(&src1, &src2, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src1.info()->padding(), padding);
+    validate(src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+template <typename T>
+using CLPhaseFixture = PhaseValidationFixture<CLTensor, CLAccessor, CLPhase, T>;
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPhaseFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::Small2DShapes(), framework::dataset::make("Format", Format::S16)),
+                                                                                                     framework::dataset::make("PhaseType", { PhaseType::SIGNED, PhaseType::UNSIGNED })))
+{
+    // Validate output
+    validate_wrap(CLAccessor(_target), _reference, tolerance_value, 0);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPhaseFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), framework::dataset::make("Format", Format::S16)),
+                                                                                                   framework::dataset::make("PhaseType", { PhaseType::SIGNED, PhaseType::UNSIGNED })))
+{
+    // Validate output
+    validate_wrap(CLAccessor(_target), _reference, tolerance_value, 0);
+}
+TEST_SUITE_END() // S16
+
+TEST_SUITE(S32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPhaseFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::Small2DShapes(), framework::dataset::make("Format", Format::S32)),
+                                                                                                     framework::dataset::make("PhaseType", { PhaseType::SIGNED, PhaseType::UNSIGNED })))
+{
+    // Validate output
+    validate_wrap(CLAccessor(_target), _reference, tolerance_value, 0);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPhaseFixture<int32_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), framework::dataset::make("Format", Format::S32)),
+                                                                                                   framework::dataset::make("PhaseType", { PhaseType::SIGNED, PhaseType::UNSIGNED })))
+{
+    // Validate output
+    validate_wrap(CLAccessor(_target), _reference, tolerance_value, 0);
+}
+TEST_SUITE_END() // S32
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/PixelWiseMultiplication.cpp b/tests/validation/CL/PixelWiseMultiplication.cpp
new file mode 100644
index 0000000..031f10f
--- /dev/null
+++ b/tests/validation/CL/PixelWiseMultiplication.cpp
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/FixedPointPixelWiseMultiplicationFixture.h"
+#include "tests/validation/fixtures/PixelWiseMultiplicationFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+const float scale_unity = 1.f;
+const float scale_255   = 1.f / 255.f;
+
+// *INDENT-OFF*
+// clang-format off
+#define VALIDATE(TYPE, TOLERANCE) validate(CLAccessor(_target), _reference, AbsoluteTolerance<TYPE>(TOLERANCE), 0.f);
+
+#define PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(TEST_NAME, FIXTURE, MODE, SHAPES, DT1, DT2, SCALE, RP, VALIDATE) \
+    FIXTURE_DATA_TEST_CASE(TEST_NAME, CLPixelWiseMultiplication##FIXTURE, framework::DatasetMode::MODE,                   \
+                           combine(combine(combine(combine(combine(                                                       \
+                           datasets::SHAPES,                                                                              \
+                           framework::dataset::make("DataType1", DataType::DT1)),                                         \
+                           framework::dataset::make("DataType2", DataType::DT2)),                                         \
+                           framework::dataset::make("Scale", std::move(SCALE))),                                          \
+                           datasets::ConvertPolicies()),                                                                  \
+                           framework::dataset::make("RoundingPolicy", RoundingPolicy::RP)))                               \
+    {                                                                                                                     \
+        VALIDATE                                                                                                          \
+    }
+
+#define FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(TEST_NAME, FIXTURE, MODE, SHAPES, DT1, DT2, SCALE, RP, FPP_START, FPP_END) \
+    FIXTURE_DATA_TEST_CASE(TEST_NAME, CLFixedPointPixelWiseMultiplication##FIXTURE, framework::DatasetMode::MODE,                      \
+                           combine(combine(combine(combine(combine(combine(                                                            \
+                           datasets::SHAPES,                                                                                           \
+                           framework::dataset::make("DataType1", DataType::DT1)),                                                      \
+                           framework::dataset::make("DataType2", DataType::DT2)),                                                      \
+                           framework::dataset::make("Scale", std::move(SCALE))),                                                       \
+                           datasets::ConvertPolicies()),                                                                               \
+                           framework::dataset::make("RoundingPolicy", RoundingPolicy::RP)),                                            \
+                           framework::dataset::make("FixedPointPosition", FPP_START, FPP_END)))                                        \
+    {                                                                                                                                  \
+        validate(CLAccessor(_target), _reference);                                                                                     \
+    }
+// clang-format on
+// *INDENT-ON*
+} // namespace
+
+template <typename T>
+using CLPixelWiseMultiplicationToF16Fixture = PixelWiseMultiplicationValidationFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T, half_float::half>;
+template <typename T>
+using CLPixelWiseMultiplicationToF32Fixture = PixelWiseMultiplicationValidationFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T, float>;
+template <typename T>
+using CLPixelWiseMultiplicationToQS8Fixture = PixelWiseMultiplicationValidationFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T, qint8_t>;
+template <typename T>
+using CLPixelWiseMultiplicationToQS16Fixture = PixelWiseMultiplicationValidationFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T, qint16_t>;
+template <typename T>
+using CLFixedPointPixelWiseMultiplicationFixture = FixedPointPixelWiseMultiplicationValidationFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T>;
+
+TEST_SUITE(CL)
+TEST_SUITE(PixelWiseMultiplication)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
+               framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),      // Window shrink
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid scale
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid data type combination
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),  // Mismatching data type
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),  // Mismatching fixed point
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),  // Invalid scale
+                                                      }),
+               framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
+                                                       TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS16, 2),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                     })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                     })),
+               framework::dataset::make("Scale",{  2.f, 2.f, 2.f, -1.f, 1.f, 1.f, 1.f, 1.f, 3.f})),
+               framework::dataset::make("Expected", { true, true, false, false, false, false, false, false, false })),
+               input1_info, input2_info, output_info, scale, expected)
+{
+    bool has_error = bool(CLPixelWiseMultiplication::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), scale, ConvertPolicy::WRAP, RoundingPolicy::TO_ZERO));
+    ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+TEST_SUITE(F16toF16)
+
+TEST_SUITE(Scale255)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToF16Fixture<half_float::half>, PRECOMMIT, SmallShapes(), F16, F16, scale_255, TO_NEAREST_UP, VALIDATE(float, 1.f))
+TEST_SUITE_END() // Scale255
+
+TEST_SUITE_END() // F16toF16
+
+TEST_SUITE(F32toF32)
+
+TEST_SUITE(Scale255)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToF32Fixture<float>, PRECOMMIT, SmallShapes(), F32, F32, scale_255, TO_NEAREST_UP, VALIDATE(float, 1.f))
+TEST_SUITE_END() // Scale255
+
+TEST_SUITE_END() // F32toF32
+
+TEST_SUITE_END() // PixelWiseMultiplication
+
+TEST_SUITE(FixedPointPixelWiseMultiplication)
+
+TEST_SUITE(QS8)
+
+TEST_SUITE(ScaleUnity)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, Fixture<qint8_t>, PRECOMMIT, SmallShapes(), QS8, QS8, scale_unity, TO_ZERO, 1, 7)
+TEST_SUITE_END() // ScaleUnity
+
+TEST_SUITE_END() // QS8
+
+TEST_SUITE(QS16)
+
+TEST_SUITE(ScaleUnity)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, scale_unity, TO_ZERO, 1, 15)
+TEST_SUITE_END() // ScaleUnity
+
+TEST_SUITE_END() // QS16
+
+TEST_SUITE_END() // FixedPointPixelWiseMultiplication
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp
index 44617f6..ee63937 100644
--- a/tests/validation/CL/PoolingLayer.cpp
+++ b/tests/validation/CL/PoolingLayer.cpp
@@ -43,23 +43,75 @@
 {
 namespace
 {
-/** Input data set for float data types */
-const auto PoolingLayerDatasetFP = combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { 2, 3, 7, 9 })),
-                                           framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) }));
+/** Input data set for floating-point data types */
+const auto PoolingLayerDatasetFP = combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { 2, 3, 4, 7, 9 })),
+                                                   framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
+                                           framework::dataset::make("ExcludePadding", { true, false }));
 
-/** Input data set for quantized data types */
-const auto PoolingLayerDatasetQS = combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { 2, 3 })),
-                                           framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) }));
+/** Input data set for fixed-point data types */
+const auto PoolingLayerDatasetQS = combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { 2, 3 })),
+                                                   framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
+                                           framework::dataset::make("ExcludePadding", { true, false }));
 
-constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for float types */
-constexpr AbsoluteTolerance<float> tolerance_f16(0.01f);  /**< Tolerance value for comparing reference's output against implementation's output for float types */
-constexpr AbsoluteTolerance<float> tolerance_qs8(3);      /**< Tolerance value for comparing reference's output against implementation's output for quantized input */
-constexpr AbsoluteTolerance<float> tolerance_qs16(6);     /**< Tolerance value for comparing reference's output against implementation's output for quantized input */
+/** Input data set for asymmetric data type */
+const auto PoolingLayerDatasetQASYMM8 = combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { 2, 3 })),
+                                                        framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
+                                                framework::dataset::make("ExcludePadding", { true, false }));
+
+constexpr AbsoluteTolerance<float>   tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */
+constexpr AbsoluteTolerance<float>   tolerance_f16(0.01f);  /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */
+constexpr AbsoluteTolerance<float>   tolerance_qs16(6);     /**< Tolerance value for comparing reference's output against implementation's output for 16-bit fixed-point type */
+constexpr AbsoluteTolerance<float>   tolerance_qs8(3);      /**< Tolerance value for comparing reference's output against implementation's output for 8-bit fixed-point type */
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);  /**< Tolerance value for comparing reference's output against implementation's output for 8-bit asymmetric type */
 } // namespace
 
 TEST_SUITE(CL)
 TEST_SUITE(PoolingLayer)
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),     // Mismatching data type
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),     // Window shrink
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 4),     // Mismatching fixed point position
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS16, 11),   // Window shrink
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),     // Invalid pad/size combination
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),     // Invalid pad/size combination
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8, 0), // Invalid parameters
+                                                       TensorInfo(TensorShape(15U, 13U, 5U), 1, DataType::F32, 0),     // Non-rectangular Global Pooling
+                                                       TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32, 0),     // Invalid output Global Pooling
+                                                       TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32, 0),
+                                                     }),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16, 0),
+                                                       TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::QS8, 5),
+                                                       TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::QS16, 11),
+                                                       TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(25U, 16U, 2U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8, 0),
+                                                       TensorInfo(TensorShape(1U, 1U, 5U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(2U, 2U, 5U), 1, DataType::F32, 0),
+                                                       TensorInfo(TensorShape(1U, 1U, 5U), 1, DataType::F32, 0),
+                                                     })),
+               framework::dataset::make("PoolInfo",  { PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)),
+                                                       PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)),
+                                                       PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)),
+                                                       PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)),
+                                                       PoolingLayerInfo(PoolingType::AVG, 2, PadStrideInfo(1, 1, 2, 0)),
+                                                       PoolingLayerInfo(PoolingType::AVG, 2, PadStrideInfo(1, 1, 0, 2)),
+                                                       PoolingLayerInfo(PoolingType::L2, 3, PadStrideInfo(1, 1, 0, 0)),
+                                                       PoolingLayerInfo(PoolingType::AVG),
+                                                       PoolingLayerInfo(PoolingType::MAX),
+                                                       PoolingLayerInfo(PoolingType::AVG),
+                                                      })),
+               framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false, true })),
+               input_info, output_info, pool_info, expected)
+{
+    ARM_COMPUTE_EXPECT(bool(CLPoolingLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pool_info)) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 template <typename T>
 using CLPoolingLayerFixture = PoolingLayerValidationFixture<CLTensor, CLAccessor, CLPoolingLayer, T>;
 
@@ -98,7 +150,7 @@
 template <typename T>
 using CLPoolingLayerFixedPointFixture = PoolingLayerValidationFixedPointFixture<CLTensor, CLAccessor, CLPoolingLayer, T>;
 
-TEST_SUITE(Quantized)
+TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQS,
                                                                                                                        framework::dataset::make("DataType", DataType::QS8))),
@@ -134,6 +186,31 @@
 TEST_SUITE_END()
 TEST_SUITE_END()
 
+TEST_SUITE(Quantized)
+
+template <typename T>
+using CLPoolingLayerQuantizedFixture = PoolingLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLPoolingLayer, T>;
+
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQASYMM8,
+                                                                                                                       framework::dataset::make("DataType", DataType::QASYMM8))),
+                                                                                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127),
+                                                                                                                       QuantizationInfo(7.f / 255, 123)
+                                                                                                                                                            })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetQASYMM8,
+                                                                                                                   framework::dataset::make("DataType", DataType::QASYMM8))),
+                                                                                                                   framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255, 0) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
 TEST_SUITE_END()
 TEST_SUITE_END()
 } // namespace validation
diff --git a/tests/validation/CL/Remap.cpp b/tests/validation/CL/Remap.cpp
new file mode 100644
index 0000000..2f08285
--- /dev/null
+++ b/tests/validation/CL/Remap.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLRemap.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/RemapFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<uint8_t> tolerance_value(1);
+constexpr float                      tolerance_number = 0.2f;
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(Remap)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                           framework::dataset::make("DataType", DataType::U8)),
+                                                                   framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT })),
+               shape, policy, data_type, border_mode)
+{
+    CLTensor src   = create_tensor<CLTensor>(shape, data_type);
+    CLTensor map_x = create_tensor<CLTensor>(shape, DataType::F32);
+    CLTensor map_y = create_tensor<CLTensor>(shape, DataType::F32);
+    CLTensor dst   = create_tensor<CLTensor>(shape, data_type);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(map_x.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(map_y.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLRemap remap;
+    remap.configure(&src, &map_x, &map_y, &dst, policy, border_mode);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    const int total_right  = ceil_to_multiple(shape[0], 4);
+    const int access_right = total_right + (((total_right - shape[0]) == 0) ? 1 : 0);
+
+    const PaddingSize read_padding(1, access_right - shape[0], 1, 1);
+    validate(src.info()->padding(), read_padding);
+
+    PaddingCalculator calculator(shape.x(), 4);
+    validate(dst.info()->padding(), calculator.required_padding());
+}
+
+template <typename T>
+using CLRemapFixture = RemapValidationFixture<CLTensor, CLAccessor, CLRemap, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLRemapFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                                                             framework::dataset::make("DataType",
+                                                                                                                     DataType::U8)),
+                                                                                                     framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLRemapFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                                                           framework::dataset::make("DataType",
+                                                                                                                   DataType::U8)),
+                                                                                                   framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/ReshapeLayer.cpp b/tests/validation/CL/ReshapeLayer.cpp
index 57027a9..740b53e 100644
--- a/tests/validation/CL/ReshapeLayer.cpp
+++ b/tests/validation/CL/ReshapeLayer.cpp
@@ -67,6 +67,14 @@
 TEST_SUITE_END()
 
 TEST_SUITE(Integer)
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLReshapeLayerFixture<uint8_t>, framework::DatasetMode::ALL, combine(datasets::SmallReshapeLayerDataset(), framework::dataset::make("DataType", { DataType::U8, DataType::QASYMM8 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
 TEST_SUITE(S8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLReshapeLayerFixture<int8_t>, framework::DatasetMode::ALL, combine(datasets::SmallReshapeLayerDataset(), framework::dataset::make("DataType", DataType::S8)))
 {
diff --git a/tests/validation/CL/Scale.cpp b/tests/validation/CL/Scale.cpp
index f43f2ae..aeda33b 100644
--- a/tests/validation/CL/Scale.cpp
+++ b/tests/validation/CL/Scale.cpp
@@ -29,6 +29,7 @@
 #include "tests/CL/CLAccessor.h"
 #include "tests/PaddingCalculator.h"
 #include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/SamplingPolicyDataset.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
@@ -66,10 +67,11 @@
 TEST_SUITE(CL)
 TEST_SUITE(Scale)
 
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::MediumShapes(), datasets::LargeShapes()), ScaleDataTypes),
-                                                                           framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                   datasets::BorderModes()),
-               shape, data_type, policy, border_mode)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(combine(concat(datasets::MediumShapes(), datasets::LargeShapes()), ScaleDataTypes),
+                                                                                   framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                           datasets::BorderModes()),
+                                                                   datasets::SamplingPolicies()),
+               shape, data_type, policy, border_mode, sampling_policy)
 {
     std::mt19937                           generator(library->seed());
     std::uniform_real_distribution<float>  distribution_float(0.25, 2);
@@ -90,18 +92,20 @@
 
     // Create and configure function
     CLScale clscale;
-    clscale.configure(&src, &dst, policy, border_mode, constant_border_value);
+    clscale.configure(&src, &dst, policy, border_mode, constant_border_value, sampling_policy);
+
+    // Get border size depending on border mode
+    const BorderSize border_size(border_mode == BorderMode::UNDEFINED ? 0 : 1);
 
     // Validate valid region
-    const ValidRegion dst_valid_region = calculate_valid_region_scale(*(src.info()), shape_scaled, policy, BorderSize(1), (border_mode == BorderMode::UNDEFINED));
-
+    const ValidRegion dst_valid_region = calculate_valid_region_scale(*(src.info()), shape_scaled, policy, border_size, (border_mode == BorderMode::UNDEFINED));
     validate(dst.info()->valid_region(), dst_valid_region);
 
     // Validate padding
     PaddingCalculator calculator(shape_scaled.x(), 4);
     calculator.set_border_mode(border_mode);
 
-    const PaddingSize read_padding(1);
+    const PaddingSize read_padding(border_size);
     const PaddingSize write_padding = calculator.required_padding(PaddingCalculator::Option::EXCLUDE_BORDER);
     validate(src.info()->padding(), read_padding);
     validate(dst.info()->padding(), write_padding);
@@ -112,9 +116,10 @@
 
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)),
-                                                                                                     framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                             datasets::BorderModes()))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)),
+                                                                                                             framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                                                     datasets::BorderModes()),
+                                                                                             datasets::SamplingPolicies()))
 {
     //Create valid region
     TensorInfo        src_info(_shape, 1, _data_type);
@@ -123,9 +128,10 @@
     // Validate output
     validate(CLAccessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)),
-                                                                                                         framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                                 datasets::BorderModes()))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)),
+                                                                                                                 framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                                                         datasets::BorderModes()),
+                                                                                                 datasets::SamplingPolicies()))
 {
     //Create valid region
     TensorInfo        src_info(_shape, 1, _data_type);
@@ -136,9 +142,10 @@
 }
 TEST_SUITE_END()
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)),
-                                                                                                    framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                            datasets::BorderModes()))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)),
+                                                                                                            framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                                                    datasets::BorderModes()),
+                                                                                            datasets::SamplingPolicies()))
 {
     //Create valid region
     TensorInfo        src_info(_shape, 1, _data_type);
@@ -147,10 +154,11 @@
     // Validate output
     validate(CLAccessor(_target), _reference, valid_region, tolerance_f16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
-                                                                                                                DataType::F16)),
-                                                                                                        framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                                datasets::BorderModes()))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                        DataType::F16)),
+                                                                                                                framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                                                        datasets::BorderModes()),
+                                                                                                datasets::SamplingPolicies()))
 {
     //Create valid region
     TensorInfo        src_info(_shape, 1, _data_type);
@@ -164,9 +172,10 @@
 
 TEST_SUITE(Integer)
 TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::U8)),
-                                                                                                       framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                               datasets::BorderModes()))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::U8)),
+                                                                                                               framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                                                       datasets::BorderModes()),
+                                                                                               datasets::SamplingPolicies()))
 {
     //Create valid region
     TensorInfo        src_info(_shape, 1, _data_type);
@@ -175,9 +184,10 @@
     // Validate output
     validate(CLAccessor(_target), _reference, valid_region, tolerance_u8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::U8)),
-                                                                                                           framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                                   datasets::BorderModes()))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::U8)),
+                                                                                                                   framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                                                           datasets::BorderModes()),
+                                                                                                   datasets::SamplingPolicies()))
 {
     //Create valid region
     TensorInfo        src_info(_shape, 1, _data_type);
@@ -188,9 +198,10 @@
 }
 TEST_SUITE_END()
 TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::S16)),
-                                                                                                       framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                               datasets::BorderModes()))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::S16)),
+                                                                                                               framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                                                       datasets::BorderModes()),
+                                                                                               datasets::SamplingPolicies()))
 {
     //Create valid region
     TensorInfo        src_info(_shape, 1, _data_type);
@@ -199,9 +210,11 @@
     // Validate output
     validate(CLAccessor(_target), _reference, valid_region, tolerance_s16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::S16)),
-                                                                                                           framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                                   datasets::BorderModes()))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::S16)),
+                                                                                                                   framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                                                           datasets::BorderModes()),
+                                                                                                   datasets::SamplingPolicies()))
 {
     //Create valid region
     TensorInfo        src_info(_shape, 1, _data_type);
diff --git a/tests/validation/CL/Scharr.cpp b/tests/validation/CL/Scharr.cpp
new file mode 100644
index 0000000..1b44a57
--- /dev/null
+++ b/tests/validation/CL/Scharr.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLScharr3x3.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/GradientDimensionDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ScharrFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(Scharr)
+
+TEST_SUITE(W3x3)
+using CLScharr3x3Fixture = ScharrValidationFixture<CLTensor, CLAccessor, CLScharr3x3, uint8_t, int16_t>;
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                   Format::U8)),
+               shape, border_mode, format)
+{
+    // Generate a random constant value
+    std::mt19937                           gen(library->seed());
+    std::uniform_int_distribution<uint8_t> int_dist(0, 255);
+    const uint8_t                          constant_border_value = int_dist(gen);
+
+    // Create tensors
+    CLTensor src   = create_tensor<CLTensor>(shape, data_type_from_format(format));
+    CLTensor dst_x = create_tensor<CLTensor>(shape, DataType::S16);
+    CLTensor dst_y = create_tensor<CLTensor>(shape, DataType::S16);
+
+    src.info()->set_format(format);
+    dst_x.info()->set_format(Format::S16);
+    dst_y.info()->set_format(Format::S16);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst_x.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst_y.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create scharr 3x3 configure function
+    CLScharr3x3 scharr;
+    scharr.configure(&src, &dst_x, &dst_y, border_mode, constant_border_value);
+
+    // Validate valid region
+    constexpr BorderSize border_size{ 1 };
+    const ValidRegion    dst_valid_region = shape_to_valid_region(shape, border_mode == BorderMode::UNDEFINED, border_size);
+
+    validate(dst_x.info()->valid_region(), dst_valid_region);
+    validate(dst_y.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+
+    calculator.set_border_mode(border_mode);
+    calculator.set_border_size(1);
+
+    const PaddingSize dst_padding = calculator.required_padding();
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-1);
+
+    const PaddingSize src_padding = calculator.required_padding();
+
+    validate(src.info()->padding(), src_padding);
+    validate(dst_x.info()->padding(), dst_padding);
+    validate(dst_y.info()->padding(), dst_padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLScharr3x3Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                        Format::U8)),
+                                                                                                datasets::GradientDimensions()))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
+
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLScharr3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                      Format::U8)),
+                                                                                              datasets::GradientDimensions()))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
+
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/Sobel.cpp b/tests/validation/CL/Sobel.cpp
index cde93e7..16f411d 100644
--- a/tests/validation/CL/Sobel.cpp
+++ b/tests/validation/CL/Sobel.cpp
@@ -100,8 +100,50 @@
     validate(dst_y.info()->padding(), dst_padding);
 }
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel3x3Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                               Format::U8)))
+TEST_SUITE(X)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel3x3Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                     Format::U8)),
+                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(Y)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel3x3Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
+{
+    // Validate output
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                     Format::U8)),
+                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
+{
+    // Validate output
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(XY)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel3x3Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
 {
     // Validate output
     ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
@@ -111,8 +153,9 @@
     validate(CLAccessor(_target.second), _reference.second, valid_region_y);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                             Format::U8)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                     Format::U8)),
+                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
 {
     // Validate output
     ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
@@ -122,6 +165,7 @@
     validate(CLAccessor(_target.second), _reference.second, valid_region_y);
 }
 TEST_SUITE_END()
+TEST_SUITE_END()
 
 TEST_SUITE(W5x5)
 using CLSobel5x5Fixture = SobelValidationFixture<CLTensor, CLAccessor, CLSobel5x5, uint8_t, int16_t>;
@@ -176,8 +220,48 @@
     validate(dst_y.info()->padding(), dst_padding);
 }
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel5x5Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                               Format::U8)))
+TEST_SUITE(X)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel5x5Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
+    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                     Format::U8)),
+                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
+    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
+}
+TEST_SUITE_END()
+TEST_SUITE(Y)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel5x5Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
+{
+    // Validate output
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
+    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                     Format::U8)),
+                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
+{
+    // Validate output
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
+    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
+}
+TEST_SUITE_END()
+TEST_SUITE(XY)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel5x5Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
 {
     // Validate output
     ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
@@ -187,8 +271,9 @@
     validate(CLAccessor(_target.second), _reference.second, valid_region_y);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                             Format::U8)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                     Format::U8)),
+                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
 {
     // Validate output
     ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
@@ -198,6 +283,7 @@
     validate(CLAccessor(_target.second), _reference.second, valid_region_y);
 }
 TEST_SUITE_END()
+TEST_SUITE_END()
 
 TEST_SUITE(W7x7)
 using CLSobel7x7Fixture = SobelValidationFixture<CLTensor, CLAccessor, CLSobel7x7, uint8_t, int32_t>;
@@ -252,9 +338,48 @@
     validate(dst_x.info()->padding(), dst_padding);
     validate(dst_y.info()->padding(), dst_padding);
 }
+TEST_SUITE(X)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel7x7Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
+    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
+}
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel7x7Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                               Format::U8)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                     Format::U8)),
+                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
+    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
+}
+TEST_SUITE_END()
+TEST_SUITE(Y)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel7x7Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
+{
+    // Validate output
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
+    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                     Format::U8)),
+                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
+{
+    // Validate output
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
+    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
+}
+TEST_SUITE_END()
+TEST_SUITE(XY)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel7x7Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
 {
     // Validate output
     ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
@@ -264,8 +389,9 @@
     validate(CLAccessor(_target.second), _reference.second, valid_region_y);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                             Format::U8)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                     Format::U8)),
+                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
 {
     // Validate output
     ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
@@ -275,6 +401,7 @@
     validate(CLAccessor(_target.second), _reference.second, valid_region_y);
 }
 TEST_SUITE_END()
+TEST_SUITE_END()
 
 TEST_SUITE_END()
 TEST_SUITE_END()
diff --git a/tests/validation/CL/SoftmaxLayer.cpp b/tests/validation/CL/SoftmaxLayer.cpp
index c469b8a..62a689d 100644
--- a/tests/validation/CL/SoftmaxLayer.cpp
+++ b/tests/validation/CL/SoftmaxLayer.cpp
@@ -21,6 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
@@ -49,9 +50,13 @@
 /** Tolerance for fixed point operations */
 constexpr AbsoluteTolerance<int16_t> tolerance_fixed_point(2);
 
+/** Tolerance for quantized operations */
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);
+
 /** CNN data types */
 const auto CNNDataTypes = framework::dataset::make("DataType",
 {
+    DataType::QASYMM8,
     DataType::F16,
     DataType::F32,
     DataType::QS8,
@@ -62,14 +67,15 @@
 TEST_SUITE(CL)
 TEST_SUITE(SoftmaxLayer)
 
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), CNNDataTypes), shape, data_type)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::SoftmaxLayerSmallShapes(), datasets::SoftmaxLayerLargeShapes()), CNNDataTypes), shape, data_type)
 {
-    // Set fixed point position data type allowed
-    const int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
+    // Set fixed point position and quantization info if is allowed
+    const int              fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
+    const QuantizationInfo quantization_info    = is_data_type_quantized_asymmetric(data_type) ? QuantizationInfo(1.f / 255.f, 0) : QuantizationInfo();
 
     // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, data_type, 1, fixed_point_position);
-    CLTensor dst = create_tensor<CLTensor>(shape, data_type, 1, fixed_point_position);
+    CLTensor src = create_tensor<CLTensor>(shape, data_type, 1, fixed_point_position, quantization_info);
+    CLTensor dst = create_tensor<CLTensor>(shape, data_type, 1, fixed_point_position, QuantizationInfo(1.f / 256.f, 0));
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -83,23 +89,75 @@
     validate(src.info()->valid_region(), valid_region);
     validate(dst.info()->valid_region(), valid_region);
 
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
+    // Get reduction kernel info
+    CLLogits1DMaxShiftExpSumKernel::ParallelReductionInfo reduction_info = CLLogits1DMaxShiftExpSumKernel::is_parallel_reduction(shape.x());
+
+    // Validate src padding
+    // Legacy path used only by quantized asymmetric data type
+    if(is_data_type_quantized_asymmetric(data_type))
+    {
+        const PaddingSize padding_src = PaddingCalculator(shape.x(), 16).required_padding();
+        validate(src.info()->padding(), padding_src);
+    }
+    else
+    {
+        const PaddingSize padding_src = PaddingCalculator(shape.x(), std::get<1>(reduction_info)).required_padding();
+        validate(src.info()->padding(), padding_src);
+    }
+
+    // Validate dst padding
+    const PaddingSize padding_dst = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(dst.info()->padding(), padding_dst);
 }
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Mismatching data types
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Mismatching shapes
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 2), // Mismatching fixed point
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8, // Invalid output quantization info
+                                                                  QuantizationInfo(1.f/256, 12)),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Window shrink
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8,
+                                                                  QuantizationInfo(1.f/256, 12)),
+                                                      }),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8,
+                                                                  QuantizationInfo(1.f/256, 12)),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8,
+                                                                  QuantizationInfo(1.f/256, 0)),
+                                                     })),
+               framework::dataset::make("Expected", { false, false, false, false, false, true, true, true })),
+               input_info, output_info, expected)
+{
+    ARM_COMPUTE_EXPECT(bool(CLSoftmaxLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 template <typename T>
 using CLSoftmaxLayerFixture = SoftmaxValidationFixture<CLTensor, CLAccessor, CLSoftmaxLayer, T>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SoftmaxLayerSmallShapes(),
+                                                                                                           framework::dataset::make("DataType", DataType::F16)),
+                                                                                                   framework::dataset::make("Beta", { 1.0f, 2.0f })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(),
+                                                                                                               framework::dataset::make("DataType", DataType::F16)),
+                                                                                                       framework::dataset::make("Beta", { 1.0f, 2.0f })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
@@ -107,12 +165,16 @@
 TEST_SUITE_END()
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SoftmaxLayerSmallShapes(),
+                                                                                                            framework::dataset::make("DataType", DataType::F32)),
+                                                                                                    framework::dataset::make("Beta", { 1.0f, 2.0f })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(),
+                                                                                                                framework::dataset::make("DataType", DataType::F32)),
+                                                                                                        framework::dataset::make("Beta", { 1.0f, 2.0f })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -123,17 +185,17 @@
 template <typename T>
 using CLSoftmaxLayerFixedPointFixture = SoftmaxValidationFixedPointFixture<CLTensor, CLAccessor, CLSoftmaxLayer, T>;
 
-TEST_SUITE(Quantized)
+TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
 // Testing for fixed point position [1,6) as reciprocal limits the maximum fixed point position to 5
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
-                                                                                                                     DataType::QS8)),
-                                                                                                                     framework::dataset::make("FractionalBits", 1, 6)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(datasets::SoftmaxLayerSmallShapes(), framework::dataset::make("DataType",
+                                                                                                                       DataType::QS8)),
+                                                                                                               framework::dataset::make("FractionalBits", 1, 6)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fixed_point);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::QS8)),
                                                                                                                    framework::dataset::make("FractionalBits", 1, 6)))
 {
@@ -144,15 +206,15 @@
 
 TEST_SUITE(QS16)
 // Testing for fixed point position [1,14) as reciprocal limits the maximum fixed point position to 14
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
-                                                                                                                      framework::dataset::make("DataType",
-                                                                                                                              DataType::QS16)),
-                                                                                                                      framework::dataset::make("FractionalBits", 1, 14)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SoftmaxLayerSmallShapes(),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::QS16)),
+                                                                                                                framework::dataset::make("FractionalBits", 1, 14)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fixed_point);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(),
                                                                                                                     framework::dataset::make("DataType",
                                                                                                                             DataType::QS16)),
                                                                                                                     framework::dataset::make("FractionalBits", 1, 14)))
@@ -163,6 +225,30 @@
 TEST_SUITE_END()
 TEST_SUITE_END()
 
+template <typename T>
+using CLSoftmaxLayerQuantizedFixture = SoftmaxValidationQuantizedFixture<CLTensor, CLAccessor, CLSoftmaxLayer, T>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(datasets::SoftmaxLayerSmallShapes(),
+                                                                                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                                                                               combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
+                                                                                                                       framework::dataset::make("Beta", { 1.0f, 2.f }))))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(),
+                                                                                                                   framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                                                                                   combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
+                                                                                                                           framework::dataset::make("Beta", { 1.0f, 2.0f }))))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
 TEST_SUITE_END()
 TEST_SUITE_END()
 } // namespace validation
diff --git a/tests/validation/CL/Transpose.cpp b/tests/validation/CL/Transpose.cpp
new file mode 100644
index 0000000..aeb1a94
--- /dev/null
+++ b/tests/validation/CL/Transpose.cpp
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLTranspose.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/TransposeFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(Transpose)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), framework::dataset::make("DataType", { DataType::S8, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32 })),
+               shape, data_type)
+{
+    // Make rows the columns of the original shape
+    TensorShape output_shape{ shape[1], shape[0] };
+
+    // Create tensors
+    CLTensor ref_src = create_tensor<CLTensor>(shape, data_type);
+    CLTensor dst     = create_tensor<CLTensor>(output_shape, data_type);
+
+    // Create and Configure function
+    CLTranspose trans;
+    trans.configure(&ref_src, &dst);
+
+    // Validate dst region
+    const ValidRegion valid_region = shape_to_valid_region(output_shape);
+    validate(dst.info()->valid_region(), valid_region);
+}
+
+template <typename T>
+using CLTransposeFixture = TransposeValidationFixture<CLTensor, CLAccessor, CLTranspose, T>;
+
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLTransposeFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small1DShapes(), datasets::Small2DShapes()),
+                                                                                                         framework::dataset::make("DataType", DataType::U8)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLTransposeFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(concat(datasets::Large1DShapes(), datasets::Large2DShapes()),
+                                                                                                       framework::dataset::make("DataType", DataType::U8)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLTransposeFixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small1DShapes(), datasets::Small2DShapes()),
+                                                                                                          framework::dataset::make("DataType", DataType::U16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLTransposeFixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(concat(datasets::Large1DShapes(), datasets::Large2DShapes()),
+                                                                                                        framework::dataset::make("DataType", DataType::U16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLTransposeFixture<uint32_t>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small1DShapes(), datasets::Small2DShapes()),
+                                                                                                          framework::dataset::make("DataType", DataType::U32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLTransposeFixture<uint32_t>, framework::DatasetMode::NIGHTLY, combine(concat(datasets::Large1DShapes(), datasets::Large2DShapes()),
+                                                                                                        framework::dataset::make("DataType", DataType::U32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/WarpAffine.cpp b/tests/validation/CL/WarpAffine.cpp
index 9db2cca..7f3001c 100644
--- a/tests/validation/CL/WarpAffine.cpp
+++ b/tests/validation/CL/WarpAffine.cpp
@@ -35,9 +35,9 @@
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/CPP/Utils.h"
 #include "tests/validation/Validation.h"
 #include "tests/validation/fixtures/WarpAffineFixture.h"
+#include "tests/validation/reference/Utils.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/CL/WarpPerspective.cpp b/tests/validation/CL/WarpPerspective.cpp
index 2edf911..a868e16 100644
--- a/tests/validation/CL/WarpPerspective.cpp
+++ b/tests/validation/CL/WarpPerspective.cpp
@@ -105,20 +105,14 @@
                                                                                                                        framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
                                                                                                                datasets::BorderModes()))
 {
-    // Create the valid mask Tensor
-    RawTensor valid_mask(_reference.shape(), _reference.data_type());
-
-    validate(CLAccessor(_target), _reference, valid_mask, tolerance_value, tolerance_number);
+    validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWarpPerspectiveFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
                                                                                                                      DataType::U8)),
                                                                                                                      framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
                                                                                                              datasets::BorderModes()))
 {
-    // Create the valid mask Tensor
-    RawTensor valid_mask{ _reference.shape(), _reference.data_type() };
-
-    validate(CLAccessor(_target), _reference, valid_mask, tolerance_value, tolerance_number);
+    validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
 }
 
 TEST_SUITE_END()
diff --git a/tests/validation/CPP/ConvolutionLayer.cpp b/tests/validation/CPP/ConvolutionLayer.cpp
deleted file mode 100644
index 656cd2e..0000000
--- a/tests/validation/CPP/ConvolutionLayer.cpp
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "ConvolutionLayer.h"
-
-#include "tests/validation/FixedPoint.h"
-#include "tests/validation/Helpers.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace reference
-{
-namespace
-{
-inline bool is_valid_pixel(int i, int min, int max)
-{
-    return (i >= min && i < max);
-}
-
-// 3D convolution for floating point type
-template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0>
-void convolution3d(const T *in, const T *weights, const T *bias, T *out, int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int fixed_point_position)
-{
-    ARM_COMPUTE_UNUSED(fixed_point_position);
-
-    const int half_width_weights  = width_weights / 2;
-    const int half_height_weights = height_weights / 2;
-
-    // Reset accumulator
-    T acc(0);
-
-    // Compute a 2D convolution for each IFM and accumulate the result
-    for(int ifm = 0; ifm < depth_in; ++ifm)
-    {
-        // Compute the offset for the input slice
-        const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
-
-        // Compute 2D convolution
-        for(int yk = -half_height_weights; yk <= half_height_weights; ++yk)
-        {
-            for(int xk = -half_width_weights; xk <= half_width_weights; ++xk)
-            {
-                // Check if the pixel is out-of-bound
-                if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
-                {
-                    const int idx = xk + half_width_weights;
-                    const int idy = yk + half_height_weights;
-
-                    const T i_value = in[offset_slice_in + xk + yk * width_in];
-                    const T w_value = weights[idx + idy * width_weights + ifm * width_weights * height_weights];
-
-                    acc += i_value * w_value;
-                }
-            }
-        }
-    }
-
-    // Accumulate the bias and store the result
-    *out = acc + (*bias);
-}
-
-// 3D convolution for fixed point type
-template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
-void convolution3d(const T *in, const T *weights, const T *bias, T *out, int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights,
-                   int fixed_point_position)
-{
-    const int half_width_weights  = width_weights / 2;
-    const int half_height_weights = height_weights / 2;
-
-    using namespace fixed_point_arithmetic;
-    using promoted_type = fixed_point_arithmetic::traits::promote_t<T>;
-
-    // Reset accumulator
-    fixed_point<promoted_type> acc(0, fixed_point_position);
-
-    // Compute a 2D convolution for each IFM and accumulate the result
-    for(int ifm = 0; ifm < depth_in; ++ifm)
-    {
-        // Compute the offset for the input slice
-        const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
-
-        // Compute 2D convolution
-        for(int yk = -half_height_weights; yk <= half_height_weights; ++yk)
-        {
-            for(int xk = -half_width_weights; xk <= half_width_weights; ++xk)
-            {
-                // Check if the pixel is out-of-bound
-                if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
-                {
-                    const int idx = xk + half_width_weights;
-                    const int idy = yk + half_height_weights;
-
-                    const fixed_point<promoted_type> i_value(in[offset_slice_in + xk + yk * width_in], fixed_point_position, true);
-                    const fixed_point<promoted_type> w_value(weights[idx + idy * width_weights + ifm * width_weights * height_weights], fixed_point_position, true);
-                    const fixed_point<promoted_type> iw = i_value * w_value;
-                    acc                                 = iw + acc;
-                }
-            }
-        }
-    }
-
-    // Get the bias
-    const fixed_point<promoted_type> b(*bias, fixed_point_position, true);
-
-    // Accumulate the bias and covert back
-    acc = acc + b;
-    fixed_point<T> res(acc);
-    *out = res.raw();
-}
-} // namespace
-
-template <typename T>
-SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<T> &bias, const TensorShape &output_shape, const PadStrideInfo &info)
-{
-    // Create reference
-    SimpleTensor<T> dst{ output_shape, src.data_type(), 1, src.fixed_point_position() };
-
-    // Compute reference
-    const int width_in       = src.shape().x();
-    const int height_in      = src.shape().y();
-    const int depth_in       = src.shape().z();
-    const int width_out      = dst.shape().x();
-    const int height_out     = dst.shape().y();
-    const int depth_out      = dst.shape().z();
-    const int width_weights  = weights.shape().x();
-    const int height_weights = weights.shape().y();
-    const int depth_weights  = weights.shape().z();
-    const int pad_xi         = std::min(static_cast<int>(info.pad().first), width_weights / 2);
-    const int pad_yi         = std::min(static_cast<int>(info.pad().second), height_weights / 2);
-    const int start_xi       = width_weights / 2 - pad_xi;
-    const int start_yi       = height_weights / 2 - pad_yi;
-    const int end_xi         = width_in - start_xi;
-    const int end_yi         = height_in - start_yi;
-    const int stride_xi      = info.stride().first;
-    const int stride_yi      = info.stride().second;
-    const int num_batches    = src.shape().total_size() / (width_in * height_in * depth_in);
-
-    for(int r = 0; r < num_batches; ++r)
-    {
-        for(int yi = start_yi; yi < end_yi; yi += stride_yi)
-        {
-            for(int xi = start_xi; xi < end_xi; xi += stride_xi)
-            {
-                for(int ofm = 0; ofm < depth_out; ++ofm)
-                {
-                    // Compute input and output offsets
-                    const int offset_in  = r * width_in * height_in * depth_in;
-                    const int xo         = (xi - start_xi) / stride_xi;
-                    const int yo         = (yi - start_yi) / stride_yi;
-                    const int offset_out = xo + yo * width_out + ofm * width_out * height_out + r * width_out * height_out * depth_out;
-
-                    // Compute 3D convolution
-                    convolution3d(src.data() + offset_in,
-                                  weights.data() + ofm * width_weights * height_weights * depth_weights,
-                                  bias.data() + ofm,
-                                  dst.data() + offset_out,
-                                  xi, yi,
-                                  width_in, height_in, depth_in,
-                                  width_weights, height_weights,
-                                  src.fixed_point_position());
-                }
-            }
-        }
-    }
-
-    return dst;
-}
-
-template SimpleTensor<float> convolution_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, const TensorShape &output_shape,
-                                               const PadStrideInfo &info);
-template SimpleTensor<half> convolution_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, const TensorShape &output_shape,
-                                              const PadStrideInfo &info);
-template SimpleTensor<qint8_t> convolution_layer(const SimpleTensor<qint8_t> &src, const SimpleTensor<qint8_t> &weights, const SimpleTensor<qint8_t> &bias, const TensorShape &output_shape,
-                                                 const PadStrideInfo &info);
-template SimpleTensor<qint16_t> convolution_layer(const SimpleTensor<qint16_t> &src, const SimpleTensor<qint16_t> &weights, const SimpleTensor<qint16_t> &bias, const TensorShape &output_shape,
-                                                  const PadStrideInfo &info);
-} // namespace reference
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CPP/DepthwiseConvolution.cpp b/tests/validation/CPP/DepthwiseConvolution.cpp
deleted file mode 100644
index ae54494..0000000
--- a/tests/validation/CPP/DepthwiseConvolution.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "DepthwiseConvolution.h"
-
-#include "ConvolutionLayer.h"
-#include "Utils.h"
-
-#include "tests/validation/Helpers.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace reference
-{
-/** Perform a depthwise convolution
- *
- * - Three dimensions tensors
- * - Third dimention is number of channels
- * - Depths of input tensor and filter are equals
- * - Padding, stride and output shape "match"
- *
- */
-template <typename T>
-SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const TensorShape &dst_shape, const PadStrideInfo &conv_info)
-{
-    // Create reference
-    SimpleTensor<T> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position() };
-
-    // Compute reference
-    const size_t filter_width  = weights.shape().x();
-    const size_t filter_height = weights.shape().y();
-    const size_t filter_plane  = filter_width * filter_height;
-    const size_t input_width   = src.shape().x();
-    const size_t input_height  = src.shape().y();
-    const size_t input_depth   = src.shape().z();
-    const int    num_batches   = src.shape().total_size() / (input_width * input_height * input_depth);
-
-    const size_t filter_half_width  = filter_width / 2;
-    const size_t filter_half_height = filter_height / 2;
-    const size_t pad_x              = std::min(filter_half_width, static_cast<size_t>(conv_info.pad().first));
-    const size_t pad_y              = std::min(filter_half_height, static_cast<size_t>(conv_info.pad().second));
-    const size_t minimum_x          = -pad_x + filter_half_width;
-    const size_t minimum_y          = -pad_y + filter_half_height;
-
-    int out_pos = 0;
-    for(int r = 0; r < num_batches; ++r)
-    {
-        for(size_t z = 0; z < input_depth; ++z)
-        {
-            for(size_t y = minimum_y; y < input_height - minimum_y; y += conv_info.stride().second)
-            {
-                for(size_t x = minimum_x; x < input_width - minimum_x; x += conv_info.stride().first)
-                {
-                    Coordinates coords(static_cast<int>(x), static_cast<int>(y), static_cast<int>(z), static_cast<int>(r));
-                    size_t      filter_offset = filter_plane * z;
-
-                    T val = 0;
-                    for(int j = y - filter_half_height; j <= static_cast<int>(y + filter_half_height); ++j)
-                    {
-                        for(int i = x - filter_half_width; i <= static_cast<int>(x + filter_half_width); ++i)
-                        {
-                            coords.set(0, i);
-                            coords.set(1, j);
-                            val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, 0.f);
-                            ++filter_offset;
-                        }
-                    }
-                    coords.set(0, x);
-                    coords.set(1, y);
-                    dst[out_pos++] = saturate_cast<T>(val);
-                }
-            }
-        }
-    }
-
-    return dst;
-}
-
-template SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const TensorShape &dst_shape, const PadStrideInfo &conv_info);
-} // namespace reference
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CPP/FullyConnectedLayer.cpp b/tests/validation/CPP/FullyConnectedLayer.cpp
deleted file mode 100644
index 2b32c4b..0000000
--- a/tests/validation/CPP/FullyConnectedLayer.cpp
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "FullyConnectedLayer.h"
-
-#include "arm_compute/core/Types.h"
-#include "tests/validation/FixedPoint.h"
-
-#include <numeric>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace reference
-{
-namespace
-{
-// Vector matrix multiply for floating point
-template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0>
-void vector_matrix_multiply(const T *src, const T *weights, const T *bias, T *dst, int cols_weights, int rows_weights, uint8_t fixed_point_position)
-{
-    ARM_COMPUTE_UNUSED(fixed_point_position);
-
-    for(int y = 0; y < rows_weights; ++y)
-    {
-        dst[y] = std::inner_product(src, src + cols_weights, weights, static_cast<T>(0)) + bias[y];
-        weights += cols_weights;
-    }
-}
-
-// Vector matrix multiply for fixed point type
-template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
-void vector_matrix_multiply(const T *src, const T *weights, const T *bias, T *dst, int cols_weights, int rows_weights, uint8_t fixed_point_position)
-{
-    using namespace fixed_point_arithmetic;
-    using promoted_type = fixed_point_arithmetic::traits::promote_t<T>;
-
-    for(int y = 0; y < rows_weights; ++y)
-    {
-        // Reset accumulator
-        fixed_point<promoted_type> acc(0, fixed_point_position);
-
-        for(int x = 0; x < cols_weights; ++x)
-        {
-            const fixed_point<promoted_type> i_value(src[x], fixed_point_position, true);
-            const fixed_point<promoted_type> w_value(weights[x], fixed_point_position, true);
-            acc = acc + i_value * w_value;
-        }
-
-        // Get the bias
-        const fixed_point<T> b(bias[y], fixed_point_position, true);
-
-        // Convert back and accumulate the bias
-        fixed_point<T> res(acc);
-        res = res + b;
-
-        // Store the result
-        dst[y] = res.raw();
-
-        weights += cols_weights;
-    }
-}
-} // namespace
-
-template <typename T>
-SimpleTensor<T> fully_connected_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<T> &bias, const TensorShape &dst_shape)
-{
-    // Create reference
-    SimpleTensor<T> dst{ TensorShape{ dst_shape }, src.data_type(), 1, src.fixed_point_position() };
-
-    // Sanity checks
-    const int          num_batch_dimensions = std::max(0, static_cast<int>(dst_shape.num_dimensions()) - 1);
-    const int          num_input_dimensions = src.shape().num_dimensions() - num_batch_dimensions;
-    const unsigned int linear_input_size    = src.shape().total_size_lower(num_input_dimensions);
-
-    ARM_COMPUTE_UNUSED(num_batch_dimensions);
-    ARM_COMPUTE_UNUSED(num_input_dimensions);
-    ARM_COMPUTE_UNUSED(linear_input_size);
-    ARM_COMPUTE_ERROR_ON(weights.shape().x() != linear_input_size);
-    ARM_COMPUTE_ERROR_ON(weights.shape().y() != bias.shape().x());
-    ARM_COMPUTE_ERROR_ON(weights.shape().y() != dst.shape().x());
-
-    // Compute reference
-    const int cols_weights = weights.shape().x();
-    const int rows_weights = weights.shape().y();
-    const int num_batches  = dst_shape.total_size_upper(1);
-
-    for(int k = 0; k < num_batches; ++k)
-    {
-        vector_matrix_multiply<T>(src.data() + k * cols_weights,
-                                  weights.data(),
-                                  bias.data(),
-                                  dst.data() + k * rows_weights,
-                                  cols_weights,
-                                  rows_weights,
-                                  src.fixed_point_position());
-    }
-
-    return dst;
-}
-
-template SimpleTensor<float> fully_connected_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, const TensorShape &dst_shape);
-template SimpleTensor<half> fully_connected_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, const TensorShape &dst_shape);
-template SimpleTensor<qint8_t> fully_connected_layer(const SimpleTensor<qint8_t> &src, const SimpleTensor<qint8_t> &weights, const SimpleTensor<qint8_t> &bias, const TensorShape &dst_shape);
-template SimpleTensor<qint16_t> fully_connected_layer(const SimpleTensor<qint16_t> &src, const SimpleTensor<qint16_t> &weights, const SimpleTensor<qint16_t> &bias, const TensorShape &dst_shape);
-} // namespace reference
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CPP/GEMMLowp.cpp b/tests/validation/CPP/GEMMLowp.cpp
deleted file mode 100644
index d172a77..0000000
--- a/tests/validation/CPP/GEMMLowp.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "GEMM.h"
-
-#include "arm_compute/core/Types.h"
-#include "tests/validation/FixedPoint.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace reference
-{
-template <typename T>
-SimpleTensor<T> gemmlowp(const SimpleTensor<T> &a, const SimpleTensor<T> &b, SimpleTensor<T> &c,
-                         int32_t a_offset, int32_t b_offset, int32_t c_offset, int32_t c_mult_int, int32_t out_shift)
-{
-    const int            K       = a.shape().x();
-    const int            b_width = b.shape().x();
-    const int            rows    = c.shape().y(); //M
-    const int            cols    = c.shape().x(); //N
-    std::vector<int32_t> acc;
-    acc.resize(cols);
-    for(int i = 0; i < rows; ++i)
-    {
-        for(int j = 0; j < cols; ++j)
-        {
-            acc[j] = 0;
-        }
-        for(int k = 0; k < K; ++k)
-        {
-            const int32_t tmp_a = a_offset + static_cast<int32_t>(a[k + i * K]);
-            for(int j = 0; j < b_width; ++j)
-            {
-                const int32_t tmp_b       = b_offset + static_cast<int32_t>(b[j + k * b_width]);
-                const int32_t mult_as_int = tmp_a * tmp_b;
-                acc[j] += mult_as_int;
-            }
-        }
-        for(int j = 0; j < cols; ++j)
-        {
-            const int32_t result = ((c_offset + acc[j]) * c_mult_int) >> out_shift;
-            c[j + i * cols]      = static_cast<uint8_t>(std::min(255, std::max(0, result)));
-        }
-    }
-
-    return c;
-}
-
-template SimpleTensor<uint8_t> gemmlowp(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, SimpleTensor<uint8_t> &c,
-                                        int32_t a_offset, int32_t b_offset, int32_t c_offset, int32_t c_mult_int, int32_t out_shift);
-} // namespace reference
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CPP/GEMMLowp.h b/tests/validation/CPP/GEMMLowp.h
deleted file mode 100644
index 2160975..0000000
--- a/tests/validation/CPP/GEMMLowp.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_TEST_GEMMLOWP_H__
-#define __ARM_COMPUTE_TEST_GEMMLOWP_H__
-
-#include "tests/SimpleTensor.h"
-#include "tests/validation/Helpers.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace reference
-{
-template <typename T>
-SimpleTensor<T> gemmlowp(const SimpleTensor<T> &a, const SimpleTensor<T> &b, SimpleTensor<T> &c,
-                         int32_t a_offset, int32_t b_offset, int32_t c_offset, int32_t c_mult_int, int32_t out_shift);
-} // namespace reference
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_GEMMLOWP_H__ */
diff --git a/tests/validation/CPP/Permute.cpp b/tests/validation/CPP/Permute.cpp
new file mode 100644
index 0000000..7f27f3c
--- /dev/null
+++ b/tests/validation/CPP/Permute.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CPP/functions/CPPPermute.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/PermuteFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+const auto PermuteParametersSmall = combine(concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
+                                            framework::dataset::make("PermutationVector", { PermutationVector(2U, 0U, 1U), PermutationVector(1U, 2U, 0U) }));
+const auto PermuteParametersLarge = combine(concat(datasets::Large3DShapes(), datasets::Large4DShapes()),
+                                            framework::dataset::make("PermutationVector", { PermutationVector(2U, 0U, 1U), PermutationVector(1U, 2U, 0U) }));
+} // namespace
+TEST_SUITE(CPP)
+TEST_SUITE(Permute)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::Small4DShapes(), framework::dataset::make("DataType", { DataType::S8, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32 })),
+               shape, data_type)
+{
+    // Define permutation vector
+    const PermutationVector perm(2U, 0U, 1U);
+
+    // Permute shapes
+    TensorShape output_shape = shape;
+    permute(output_shape, perm);
+
+    // Create tensors
+    Tensor ref_src = create_tensor<Tensor>(shape, data_type);
+    Tensor dst     = create_tensor<Tensor>(output_shape, data_type);
+
+    // Create and Configure function
+    CPPPermute perm_func;
+    perm_func.configure(&ref_src, &dst, perm);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(output_shape);
+    validate(dst.info()->valid_region(), valid_region);
+}
+
+template <typename T>
+using CPPPermuteFixture = PermuteValidationFixture<Tensor, Accessor, CPPPermute, T>;
+
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CPPPermuteFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(PermuteParametersSmall, framework::dataset::make("DataType", DataType::U8)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CPPPermuteFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(PermuteParametersLarge, framework::dataset::make("DataType", DataType::U8)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CPPPermuteFixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(PermuteParametersSmall, framework::dataset::make("DataType", DataType::U16)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CPPPermuteFixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(PermuteParametersLarge, framework::dataset::make("DataType", DataType::U16)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CPPPermuteFixture<uint32_t>, framework::DatasetMode::PRECOMMIT, combine(PermuteParametersSmall, framework::dataset::make("DataType", DataType::U32)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CPPPermuteFixture<uint32_t>, framework::DatasetMode::NIGHTLY, combine(PermuteParametersLarge, framework::dataset::make("DataType", DataType::U32)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/ActivationLayer.cpp b/tests/validation/GLES_COMPUTE/ActivationLayer.cpp
new file mode 100644
index 0000000..23821d3
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/ActivationLayer.cpp
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ActivationFunctionsDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ActivationLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Define tolerance of the activation layer.
+ *
+ * @param[in] activation The activation function used.
+ * @param[in] data_type  Data type.
+ *
+ * @return Tolerance depending on the activation function.
+ */
+AbsoluteTolerance<float> tolerance(ActivationLayerInfo::ActivationFunction activation, DataType data_type)
+{
+    constexpr float epsilon = 1e-6f;
+
+    switch(activation)
+    {
+        case ActivationLayerInfo::ActivationFunction::LINEAR:
+            return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.2f : epsilon);
+        case ActivationLayerInfo::ActivationFunction::SQUARE:
+            return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.1f : epsilon);
+        case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+            if(is_data_type_fixed_point(data_type))
+            {
+                return AbsoluteTolerance<float>(5.f);
+            }
+            else
+            {
+                return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.001f : epsilon);
+            }
+        case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
+            return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.00001f : epsilon);
+        case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
+        case ActivationLayerInfo::ActivationFunction::SQRT:
+            if(is_data_type_fixed_point(data_type))
+            {
+                return AbsoluteTolerance<float>(5.f);
+            }
+            else
+            {
+                return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.01f : 0.00001f);
+            }
+        case ActivationLayerInfo::ActivationFunction::TANH:
+            if(is_data_type_fixed_point(data_type))
+            {
+                return AbsoluteTolerance<float>(5.f);
+            }
+            else
+            {
+                return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.001f : 0.00001f);
+            }
+        default:
+            return AbsoluteTolerance<float>(epsilon);
+    }
+}
+
+/** CNN data types */
+const auto CNNDataTypes = framework::dataset::make("DataType",
+{
+    DataType::F16,
+    DataType::F32,
+});
+
+/** Input data sets. */
+const auto ActivationDataset = combine(combine(framework::dataset::make("InPlace", { false, true }), datasets::ActivationFunctions()), framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(ActivationLayer)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), CNNDataTypes), framework::dataset::make("InPlace", { false, true })),
+               shape, data_type, in_place)
+{
+    // Set fixed point position data type allowed
+    const int fixed_point_position = 0;
+
+    // Create tensors
+    GCTensor src = create_tensor<GCTensor>(shape, data_type, 1, fixed_point_position);
+    GCTensor dst = create_tensor<GCTensor>(shape, data_type, 1, fixed_point_position);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    GCActivationLayer act_layer;
+
+    if(in_place)
+    {
+        act_layer.configure(&src, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS));
+    }
+    else
+    {
+        act_layer.configure(&src, &dst, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS));
+    }
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+
+    if(!in_place)
+    {
+        validate(dst.info()->valid_region(), valid_region);
+    }
+
+    // Validate padding
+    const int         step    = (arm_compute::data_size_from_type(data_type) == 4 ? 1 : 2);
+    const PaddingSize padding = PaddingCalculator(shape.x(), step).required_padding();
+    validate(src.info()->padding(), padding);
+
+    if(!in_place)
+    {
+        validate(dst.info()->padding(), padding);
+    }
+}
+
+template <typename T>
+using GCActivationLayerFixture = ActivationValidationFixture<GCTensor, GCAccessor, GCActivationLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCActivationLayerFixture<half_float::half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ActivationDataset),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance(_function, _data_type));
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCActivationLayerFixture<half_float::half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ActivationDataset),
+                                                                                                                      framework::dataset::make("DataType",
+                                                                                                                              DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance(_function, _data_type));
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCActivationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ActivationDataset), framework::dataset::make("DataType",
+                                                                                                             DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance(_function, _data_type));
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCActivationLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ActivationDataset), framework::dataset::make("DataType",
+                                                                                                           DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance(_function, _data_type));
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp b/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp
new file mode 100644
index 0000000..a82149b
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/RandomBatchNormalizationLayerDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/BatchNormalizationLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<float> tolerance_f(0.00001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+constexpr AbsoluteTolerance<float> tolerance_f16(0.01f);  /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(BatchNormalizationLayer)
+
+template <typename T>
+using GCBatchNormalizationLayerFixture = BatchNormalizationLayerValidationFixture<GCTensor, GCAccessor, GCBatchNormalizationLayer, T>;
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::RandomBatchNormalizationLayerDataset(), framework::dataset::make("DataType", { DataType::F32 })),
+               shape0, shape1, epsilon, dt)
+{
+    // Set fixed point position data type allowed
+    int fixed_point_position = (arm_compute::is_data_type_fixed_point(dt)) ? 3 : 0;
+
+    // Create tensors
+    GCTensor src   = create_tensor<GCTensor>(shape0, dt, 1, fixed_point_position);
+    GCTensor dst   = create_tensor<GCTensor>(shape0, dt, 1, fixed_point_position);
+    GCTensor mean  = create_tensor<GCTensor>(shape1, dt, 1, fixed_point_position);
+    GCTensor var   = create_tensor<GCTensor>(shape1, dt, 1, fixed_point_position);
+    GCTensor beta  = create_tensor<GCTensor>(shape1, dt, 1, fixed_point_position);
+    GCTensor gamma = create_tensor<GCTensor>(shape1, dt, 1, fixed_point_position);
+
+    // Create and Configure function
+    GCBatchNormalizationLayer norm;
+    norm.configure(&src, &dst, &mean, &var, &beta, &gamma, epsilon);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape0);
+    validate(dst.info()->valid_region(), valid_region);
+}
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(),
+                                                                                                                  framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f16, 0);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(),
+                                                                                                                   framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f, 0);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp b/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp
new file mode 100644
index 0000000..1d667f7
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DepthConcatenateLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(GC)
+TEST_SUITE(DepthConcatenateLayer)
+
+template <typename T>
+using GCDepthConcatenateLayerFixture = DepthConcatenateLayerValidationFixture<GCTensor, IGCTensor, GCAccessor, GCDepthConcatenateLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
+                                                                                                                  DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
+                                                                                                                DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/DirectConvolutionLayer.cpp b/tests/validation/GLES_COMPUTE/DirectConvolutionLayer.cpp
new file mode 100644
index 0000000..eb3d307
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/DirectConvolutionLayer.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DirectConvolutionLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<half>  tolerance_fp16(half(0.2)); /**< Tolerance for floating point tests */
+RelativeTolerance<float> tolerance_fp32(0.02f);     /**< Tolerance for floating point tests */
+constexpr float          tolerance_num = 0.07f;     /**< Tolerance number */
+
+/** Direct convolution data set. */
+const auto data_quantized = combine(datasets::SmallDirectConvolutionShapes(),
+                                    combine(framework::dataset::make("StrideX", 1, 3),
+                                            combine(framework::dataset::make("StrideY", 1, 3),
+                                                    combine(concat(combine(framework::dataset::make("PadX", 0),
+                                                                           combine(framework::dataset::make("PadY", 0),
+                                                                                   framework::dataset::make("KernelSize", 1))),
+                                                                   combine(framework::dataset::make("PadX", 0, 2),
+                                                                           combine(framework::dataset::make("PadY", 0, 2),
+                                                                                   framework::dataset::make("KernelSize", { 3 })))),
+                                                            framework::dataset::make("NumKernels", { 1, 4, 8, 16 })))));
+
+const auto data = combine(datasets::SmallDirectConvolutionShapes(),
+                          combine(framework::dataset::make("StrideX", 1, 3),
+                                  combine(framework::dataset::make("StrideY", 1, 3),
+                                          combine(concat(combine(framework::dataset::make("PadX", 0),
+                                                                 combine(framework::dataset::make("PadY", 0),
+                                                                         framework::dataset::make("KernelSize", 1))),
+                                                         combine(framework::dataset::make("PadX", 0, 2),
+                                                                 combine(framework::dataset::make("PadY", 0, 2),
+                                                                         framework::dataset::make("KernelSize", { 3, 5 })))),
+                                                  framework::dataset::make("NumKernels", { 1, 4, 8, 16 })))));
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(DirectConvolutionLayer)
+
+template <typename T>
+using GCDirectConvolutionLayerFixture = DirectConvolutionValidationFixture<GCTensor, GCAccessor, GCDirectConvolutionLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(Run, GCDirectConvolutionLayerFixture<half_float::half>, framework::DatasetMode::ALL, combine(data, framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_fp16, tolerance_num);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(Run, GCDirectConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(data, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp b/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp
new file mode 100644
index 0000000..4040f46
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/GLES_COMPUTE/Helper.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/FullyConnectedLayerDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/FullyConnectedLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Tolerance for float operations */
+RelativeTolerance<float>            tolerance_f32(0.05f);
+RelativeTolerance<half_float::half> tolerance_f16(half(0.2));
+constexpr float                     tolerance_num = 0.07f; /**< Tolerance number */
+
+/** CNN data types */
+const auto CNNDataTypes = framework::dataset::make("DataType",
+{
+    DataType::F16,
+    DataType::F32,
+});
+
+const auto FullyConnectedParameters = combine(framework::dataset::make("TransposeWeights", { false, true }), framework::dataset::make("ReshapeWeights", { false, true }));
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(FullyConnectedLayer)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallFullyConnectedLayerDataset(), datasets::LargeFullyConnectedLayerDataset()),
+                                                                           FullyConnectedParameters),
+                                                                   CNNDataTypes),
+               src_shape, weights_shape, bias_shape, dst_shape, transpose_weights, reshape_weights, data_type)
+{
+    // Set fixed point position data type allowed
+    int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
+
+    TensorShape ws(weights_shape);
+
+    // Transpose weights if not done in the function
+    if(!reshape_weights || !transpose_weights)
+    {
+        const size_t shape_x = ws.x();
+        ws.set(0, ws.y());
+        ws.set(1, shape_x);
+    }
+
+    // Create tensors
+    GCTensor src     = create_tensor<GCTensor>(src_shape, data_type, 1, fixed_point_position);
+    GCTensor weights = create_tensor<GCTensor>(ws, data_type, 1, fixed_point_position);
+    GCTensor bias    = create_tensor<GCTensor>(bias_shape, data_type, 1, fixed_point_position);
+    GCTensor dst     = create_tensor<GCTensor>(dst_shape, data_type, 1, fixed_point_position);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function.
+    GCFullyConnectedLayer fc;
+    fc.configure(&src, &weights, &bias, &dst, transpose_weights, !reshape_weights);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(dst_shape);
+    validate(dst.info()->valid_region(), dst_valid_region);
+}
+
+template <typename T>
+using GCFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<GCTensor, GCAccessor, GCFullyConnectedLayer, T, false>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCFullyConnectedLayerFixture<half_float::half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(),
+                       FullyConnectedParameters),
+                       framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture<half_float::half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(),
+                       FullyConnectedParameters),
+                       framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters),
+                                                                                                                 framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters),
+                                                                                                               framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/GEMM.cpp b/tests/validation/GLES_COMPUTE/GEMM.cpp
new file mode 100644
index 0000000..fe6de2d
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/GEMM.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/GLES_COMPUTE/Helper.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/LargeGEMMDataset.h"
+#include "tests/datasets/SmallGEMMDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/GEMMFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
+
+/** CNN data types */
+const auto CNNDataTypes = framework::dataset::make("DataType",
+{
+    DataType::F32,
+});
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(GEMM)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallGEMMDataset(), datasets::LargeGEMMDataset()), CNNDataTypes),
+               shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type)
+{
+    // Set fixed point position data type allowed
+    const int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
+
+    // Create tensors
+    GCTensor a   = create_tensor<GCTensor>(shape_a, data_type, 1, fixed_point_position);
+    GCTensor b   = create_tensor<GCTensor>(shape_b, data_type, 1, fixed_point_position);
+    GCTensor c   = create_tensor<GCTensor>(shape_c, data_type, 1, fixed_point_position);
+    GCTensor dst = create_tensor<GCTensor>(output_shape, data_type, 1, fixed_point_position);
+
+    ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    GCGEMM gemm;
+    gemm.configure(&a, &b, &c, &dst, alpha, beta);
+}
+
+template <typename T>
+using GCGEMMFixture = GEMMValidationFixture<GCTensor, GCAccessor, GCGEMM, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCGEMMFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallGEMMDataset(), framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCGEMMFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMDataset(), framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/GlobalPoolingLayer.cpp b/tests/validation/GLES_COMPUTE/GlobalPoolingLayer.cpp
new file mode 100644
index 0000000..88372ff
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/GlobalPoolingLayer.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/PoolingTypesDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/PoolingLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Input data set for float data types */
+const auto GlobalPoolingLayerDataset = combine(datasets::GlobalPoolingShapes(), datasets::PoolingTypes());
+
+/** Input data set for quantized data types */
+constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for FP32 types */
+constexpr AbsoluteTolerance<float> tolerance_f16(0.01f);  /**< Tolerance value for comparing reference's output against implementation's output for FP16 types */
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(GlobalPoolingLayer)
+
+template <typename T>
+using GCGlobalPoolingLayerFixture = GlobalPoolingLayerValidationFixture<GCTensor, GCAccessor, GCPoolingLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunGlobalPooling, GCGlobalPoolingLayerFixture<float>, framework::DatasetMode::ALL, combine(GlobalPoolingLayerDataset, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunGlobalPooling, GCGlobalPoolingLayerFixture<half>, framework::DatasetMode::ALL, combine(GlobalPoolingLayerDataset, framework::dataset::make("DataType",
+                                                                                                                 DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/NormalizationLayer.cpp b/tests/validation/GLES_COMPUTE/NormalizationLayer.cpp
new file mode 100644
index 0000000..88f23f3
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/NormalizationLayer.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/NormalizationTypesDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/NormalizationLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Tolerance for float operations */
+constexpr AbsoluteTolerance<float> tolerance_f32(0.00001f);
+
+/** Input data set. */
+const auto NormalizationDataset = combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("NormType", { NormType::IN_MAP_1D, NormType::CROSS_MAP })),
+                                                          framework::dataset::make("NormalizationSize", 3, 9, 2)),
+                                                  framework::dataset::make("Beta", { 0.5f, 1.f, 2.f })),
+                                          framework::dataset::make("IsScaled", { true, false }));
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(NormalizationLayer)
+
+template <typename T>
+using GCNormalizationLayerFixture = NormalizationValidationFixture<GCTensor, GCAccessor, GCNormalizationLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCNormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCNormalizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp b/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp
new file mode 100644
index 0000000..2c28141
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/SoftmaxLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Tolerance for float operations */
+RelativeTolerance<half>  tolerance_f16(half(0.2));
+RelativeTolerance<float> tolerance_f32(0.001f);
+
+/** CNN data types */
+const auto CNNDataTypes = framework::dataset::make("DataType",
+{
+    DataType::F16,
+    DataType::F32,
+});
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(SoftmaxLayer)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::SoftmaxLayerSmallShapes(), datasets::SoftmaxLayerLargeShapes()), CNNDataTypes), shape, data_type)
+{
+    // Set fixed point position data type allowed
+    const int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
+
+    // Create tensors
+    GCTensor src = create_tensor<GCTensor>(shape, data_type, 1, fixed_point_position);
+    GCTensor dst = create_tensor<GCTensor>(shape, data_type, 1, fixed_point_position);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    GCSoftmaxLayer smx_layer;
+    smx_layer.configure(&src, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 8).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+template <typename T>
+using GCSoftmaxLayerFixture = SoftmaxValidationFixture<GCTensor, GCAccessor, GCSoftmaxLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCSoftmaxLayerFixture<half_float::half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SoftmaxLayerSmallShapes(),
+                                                                                                                     framework::dataset::make("DataType", DataType::F16)),
+                                                                                                                     framework::dataset::make("Beta", 1.0f)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCSoftmaxLayerFixture<half_float::half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(),
+                                                                                                                   framework::dataset::make("DataType", DataType::F16)),
+                                                                                                                   framework::dataset::make("Beta", 1.0f)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCSoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SoftmaxLayerSmallShapes(),
+                                                                                                                  framework::dataset::make("DataType", DataType::F32)),
+                                                                                                          framework::dataset::make("Beta", 1.0f)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCSoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(),
+                                                                                                                framework::dataset::make("DataType", DataType::F32)),
+                                                                                                        framework::dataset::make("Beta", 1.0f)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/Transpose.cpp b/tests/validation/GLES_COMPUTE/Transpose.cpp
new file mode 100644
index 0000000..f31a99a
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/Transpose.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/TransposeFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(GC)
+TEST_SUITE(Transpose)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), framework::dataset::make("DataType", { DataType::F16, DataType::F32 })),
+               shape, data_type)
+{
+    // Make rows the columns of the original shape
+    TensorShape output_shape{ shape[1], shape[0] };
+
+    // Create tensors
+    GCTensor ref_src = create_tensor<GCTensor>(shape, data_type);
+    GCTensor dst     = create_tensor<GCTensor>(output_shape, data_type);
+
+    // Create and Configure function
+    GCTranspose trans;
+    trans.configure(&ref_src, &dst);
+
+    // Validate dst region
+    const ValidRegion valid_region = shape_to_valid_region(output_shape);
+    validate(dst.info()->valid_region(), valid_region);
+}
+
+template <typename T>
+using GCTransposeFixture = TransposeValidationFixture<GCTensor, GCAccessor, GCTranspose, T>;
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCTransposeFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCTransposeFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCTransposeFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCTransposeFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/Helpers.cpp b/tests/validation/Helpers.cpp
index 23ad62a..313b059 100644
--- a/tests/validation/Helpers.cpp
+++ b/tests/validation/Helpers.cpp
@@ -112,6 +112,27 @@
 
     return params;
 }
+
+SimpleTensor<float> convert_from_asymmetric(const SimpleTensor<uint8_t> &src)
+{
+    const QuantizationInfo &quantization_info = src.quantization_info();
+    SimpleTensor<float>     dst{ src.shape(), DataType::F32, 1, 0 };
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        dst[i] = quantization_info.dequantize(src[i]);
+    }
+    return dst;
+}
+
+SimpleTensor<uint8_t> convert_to_asymmetric(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info)
+{
+    SimpleTensor<uint8_t> dst{ src.shape(), DataType::QASYMM8, 1, 0, quantization_info };
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        dst[i] = quantization_info.quantize(src[i], RoundingPolicy::TO_NEAREST_UP);
+    }
+    return dst;
+}
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h
index eecf976..ab7dbd8 100644
--- a/tests/validation/Helpers.h
+++ b/tests/validation/Helpers.h
@@ -186,7 +186,7 @@
 template <typename T>
 std::pair<T, T> get_batchnormalization_layer_test_bounds(int fixed_point_position = 1)
 {
-    bool is_float = std::is_floating_point<T>::value;
+    const bool is_float = std::is_floating_point<T>::value;
     std::pair<T, T> bounds;
 
     // Set initial values
@@ -201,6 +201,23 @@
 
     return bounds;
 }
+
+/** Convert quantized simple tensor into float using tensor quantization information.
+ *
+ * @param[in] src Quantized tensor.
+ *
+ * @return Float tensor.
+ */
+SimpleTensor<float> convert_from_asymmetric(const SimpleTensor<uint8_t> &src);
+
+/** Convert float simple tensor into quantized using specified quantization information.
+ *
+ * @param[in] src               Float tensor.
+ * @param[in] quantization_info Quantification information.
+ *
+ * @return Quantized tensor.
+ */
+SimpleTensor<uint8_t> convert_to_asymmetric(const SimpleTensor<float> &src, const QuantizationInfo &quantization_info);
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/NEON/AbsoluteDifference.cpp b/tests/validation/NEON/AbsoluteDifference.cpp
new file mode 100644
index 0000000..74c5701
--- /dev/null
+++ b/tests/validation/NEON/AbsoluteDifference.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/AbsoluteDifferenceFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Input data sets **/
+const auto AbsoluteDifferenceU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)), framework::dataset::make("DataType",
+                                                 DataType::U8));
+const auto AbsoluteDifferenceS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
+                                                  framework::dataset::make("DataType", DataType::S16));
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(AbsoluteDifference)
+
+template <typename T>
+using NEAbsoluteDifferenceFixture = AbsoluteDifferenceValidationFixture<Tensor, Accessor, NEAbsoluteDifference, T>;
+
+TEST_SUITE(U8)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), AbsoluteDifferenceU8Dataset),
+               shape, data_type0, data_type1, output_data_type)
+{
+    // Create tensors
+    Tensor ref_src1 = create_tensor<Tensor>(shape, data_type0);
+    Tensor ref_src2 = create_tensor<Tensor>(shape, data_type1);
+    Tensor dst      = create_tensor<Tensor>(shape, output_data_type);
+
+    // Create and Configure function
+    NEAbsoluteDifference abs_diff;
+    abs_diff.configure(&ref_src1, &ref_src2, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(ref_src1.info()->padding(), padding);
+    validate(ref_src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEAbsoluteDifferenceFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), AbsoluteDifferenceU8Dataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEAbsoluteDifferenceFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AbsoluteDifferenceU8Dataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(S16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), AbsoluteDifferenceS16Dataset),
+               shape, data_type0, data_type1, output_data_type)
+{
+    // Create tensors
+    Tensor ref_src1 = create_tensor<Tensor>(shape, data_type0);
+    Tensor ref_src2 = create_tensor<Tensor>(shape, data_type1);
+    Tensor dst      = create_tensor<Tensor>(shape, output_data_type);
+
+    // Create and Configure function
+    NEAbsoluteDifference abs_diff;
+    abs_diff.configure(&ref_src1, &ref_src2, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(ref_src1.info()->padding(), padding);
+    validate(ref_src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEAbsoluteDifferenceFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), AbsoluteDifferenceS16Dataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEAbsoluteDifferenceFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AbsoluteDifferenceS16Dataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/Accumulate.cpp b/tests/validation/NEON/Accumulate.cpp
new file mode 100644
index 0000000..cb6894a
--- /dev/null
+++ b/tests/validation/NEON/Accumulate.cpp
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEAccumulate.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/AccumulateFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Tolerance value for comparing reference's output against implementation's output for floating point data types */
+constexpr AbsoluteTolerance<float> tolerance(1.0f);
+/** Input data sets **/
+const auto AccumulateU8Dataset  = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8));
+const auto AccumulateS16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16));
+} // namespace
+TEST_SUITE(NEON)
+TEST_SUITE(Accumulate)
+
+TEST_SUITE(U8)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), AccumulateS16Dataset),
+               shape, data_type, output_data_type)
+{
+    // Create tensors
+    Tensor ref_src = create_tensor<Tensor>(shape, data_type);
+    Tensor dst     = create_tensor<Tensor>(shape, output_data_type);
+
+    // Create and Configure function
+    NEAccumulate accum;
+    accum.configure(&ref_src, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(ref_src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+template <typename T1>
+using NEAccumulateFixture = AccumulateValidationFixture<Tensor, Accessor, NEAccumulate, T1, int16_t>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEAccumulateFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), AccumulateS16Dataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEAccumulateFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AccumulateS16Dataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE(AccumulateWeighted)
+
+TEST_SUITE(U8)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), AccumulateU8Dataset),
+               shape, data_type, output_data_type)
+{
+    // Generate a random alpha value
+    std::mt19937                     gen(library->seed());
+    std::uniform_real_distribution<> float_dist(0, 1);
+    const float                      alpha = float_dist(gen);
+
+    // Create tensors
+    Tensor ref_src = create_tensor<Tensor>(shape, data_type);
+    Tensor dst     = create_tensor<Tensor>(shape, output_data_type);
+
+    // Create and Configure function
+    NEAccumulateWeighted accum_weight;
+    accum_weight.configure(&ref_src, alpha, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(ref_src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+template <typename T1>
+using NEAccumulateWeightedFixture = AccumulateWeightedValidationFixture<Tensor, Accessor, NEAccumulateWeighted, T1, uint8_t>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEAccumulateWeightedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), AccumulateU8Dataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEAccumulateWeightedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AccumulateU8Dataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE(AccumulateSquared)
+
+TEST_SUITE(U8)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), AccumulateS16Dataset),
+               shape, data_type, output_data_type)
+{
+    // Generate a random shift value
+    std::mt19937                            gen(library->seed());
+    std::uniform_int_distribution<uint32_t> int_dist(0, 15);
+    const uint32_t                          shift = int_dist(gen);
+
+    // Create tensors
+    Tensor ref_src = create_tensor<Tensor>(shape, data_type);
+    Tensor dst     = create_tensor<Tensor>(shape, output_data_type);
+
+    // Create and Configure function
+    NEAccumulateSquared accum_square;
+    accum_square.configure(&ref_src, shift, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(ref_src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+template <typename T1>
+using NEAccumulateSquaredFixture = AccumulateSquaredValidationFixture<Tensor, Accessor, NEAccumulateSquared, T1, int16_t>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEAccumulateSquaredFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), AccumulateS16Dataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEAccumulateSquaredFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AccumulateS16Dataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/ActivationLayer.cpp b/tests/validation/NEON/ActivationLayer.cpp
index f58f3a8..8a918b2 100644
--- a/tests/validation/NEON/ActivationLayer.cpp
+++ b/tests/validation/NEON/ActivationLayer.cpp
@@ -78,9 +78,9 @@
 /** CNN data types */
 const auto CNNDataTypes = framework::dataset::make("DataType",
 {
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
     DataType::F16,
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
     DataType::F32,
     DataType::QS8,
     DataType::QS16,
@@ -137,11 +137,41 @@
     }
 }
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+    framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // Mismatching data types
+                                            TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 2),  // Mismatching fixed point
+                                            TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                          }),
+    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16),
+                                            TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 3),
+                                            TensorInfo(),
+                                          })),
+    framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                                 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                                               })),
+    framework::dataset::make("Expected", { false, true, false, false, true })),
+    input_info, output_info, act_info, expected)
+{
+    bool is_valid = bool(NEActivationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), act_info));
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 template <typename T>
 using NEActivationLayerFixture = ActivationValidationFixture<Tensor, Accessor, NEActivationLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ActivationDataset),
                                                                                                             framework::dataset::make("DataType",
@@ -158,7 +188,7 @@
     validate(Accessor(_target), _reference, tolerance(_data_type, _function));
 }
 TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ActivationDataset), framework::dataset::make("DataType",
@@ -179,7 +209,7 @@
 template <typename T>
 using NEActivationLayerFixedPointFixture = ActivationValidationFixedPointFixture<Tensor, Accessor, NEActivationLayer, T>;
 
-TEST_SUITE(Quantized)
+TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
 // We test for fixed point precision [3,5] because [1,2] and [6,7] ranges cause
 // overflowing issues in most of the transcendentals functions.
@@ -222,6 +252,34 @@
 TEST_SUITE_END()
 TEST_SUITE_END()
 
+template <typename T>
+using NEActivationLayerQuantizedFixture = ActivationValidationQuantizedFixture<Tensor, Accessor, NEActivationLayer, T>;
+
+/** Input data sets. */
+const auto QuantizedActivationDataset = combine(combine(framework::dataset::make("InPlace", { false, true }), framework::dataset::make("ActivationFunction", { ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU })),
+                                                framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEActivationLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), QuantizedActivationDataset),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::QASYMM8)),
+                                                                                                                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance(_data_type, _function));
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEActivationLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), QuantizedActivationDataset),
+                                                                                                                      framework::dataset::make("DataType",
+                                                                                                                              DataType::QASYMM8)),
+                                                                                                                      framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.1f, 128.0f) })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance(_data_type, _function));
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
 TEST_SUITE_END()
 TEST_SUITE_END()
 } // namespace validation
diff --git a/tests/validation/NEON/ArithmeticAddition.cpp b/tests/validation/NEON/ArithmeticAddition.cpp
index edc59a9..e20e8df 100644
--- a/tests/validation/NEON/ArithmeticAddition.cpp
+++ b/tests/validation/NEON/ArithmeticAddition.cpp
@@ -52,10 +52,10 @@
                                                   framework::dataset::make("DataType", DataType::QS8));
 const auto ArithmeticAdditionQS16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QS16), framework::dataset::make("DataType", DataType::QS16)),
                                                    framework::dataset::make("DataType", DataType::QS16));
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 const auto ArithmeticAdditionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
                                                    framework::dataset::make("DataType", DataType::F16));
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 const auto ArithmeticAdditionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
                                                    framework::dataset::make("DataType", DataType::F32));
 } // namespace
@@ -66,6 +66,41 @@
 template <typename T>
 using NEArithmeticAdditionFixture = ArithmeticAdditionValidationFixture<Tensor, Accessor, NEArithmeticAddition, T>;
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+               framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),      // Window shrink
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid data type combination
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),  // Mismatching fixed point
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                      }),
+               framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
+                                                       TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                     })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                     })),
+               framework::dataset::make("Expected", { true, true, false, false, false, false, true })),
+               input1_info, input2_info, output_info, expected)
+{
+    ARM_COMPUTE_EXPECT(bool(NEArithmeticAddition::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), ConvertPolicy::WRAP)) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 TEST_SUITE(U8)
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                shape, policy)
@@ -180,7 +215,7 @@
 TEST_SUITE_END()
 
 TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(F16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP16Dataset),
                                                                                                          framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
@@ -189,7 +224,7 @@
     validate(Accessor(_target), _reference);
 }
 TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 TEST_SUITE(F32)
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
diff --git a/tests/validation/NEON/ArithmeticSubtraction.cpp b/tests/validation/NEON/ArithmeticSubtraction.cpp
index 3d184f1..f5a5033 100644
--- a/tests/validation/NEON/ArithmeticSubtraction.cpp
+++ b/tests/validation/NEON/ArithmeticSubtraction.cpp
@@ -49,14 +49,20 @@
                                                                              DataType::U8));
 const auto ArithmeticSubtractionS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
                                                      framework::dataset::make("DataType", DataType::S16));
+const auto ArithmeticSubtractionU8U8S16Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)),
+                                                         framework::dataset::make("DataType", DataType::S16));
+const auto ArithmeticSubtractionS16U8S16Dataset = combine(combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8)),
+                                                          framework::dataset::make("DataType", DataType::S16));
+const auto ArithmeticSubtractionU8S16S16Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16)),
+                                                          framework::dataset::make("DataType", DataType::S16));
 const auto ArithmeticSubtractionQS8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QS8), framework::dataset::make("DataType", DataType::QS8)),
                                                      framework::dataset::make("DataType", DataType::QS8));
 const auto ArithmeticSubtractionQS16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QS16), framework::dataset::make("DataType", DataType::QS16)),
                                                       framework::dataset::make("DataType", DataType::QS16));
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 const auto ArithmeticSubtractionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
                                                       framework::dataset::make("DataType", DataType::F16));
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 const auto ArithmeticSubtractionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
                                                       framework::dataset::make("DataType", DataType::F32));
 } // namespace
@@ -64,8 +70,43 @@
 TEST_SUITE(NEON)
 TEST_SUITE(ArithmeticSubtraction)
 
-template <typename T>
-using NEArithmeticSubtractionFixture = ArithmeticSubtractionValidationFixture<Tensor, Accessor, NEArithmeticSubtraction, T>;
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+               framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),      // Window shrink
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid data type combination
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),  // Mismatching fixed point
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                      }),
+               framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
+                                                       TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                     })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                     })),
+               framework::dataset::make("Expected", { true, true, false, false, false, false, true })),
+               input1_info, input2_info, output_info, expected)
+{
+    ARM_COMPUTE_EXPECT(bool(NEArithmeticSubtraction::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), ConvertPolicy::WRAP)) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T1, typename T2 = T1, typename T3 = T1>
+using NEArithmeticSubtractionFixture = ArithmeticSubtractionValidationFixture<Tensor, Accessor, NEArithmeticSubtraction, T1, T2, T3>;
 
 TEST_SUITE(U8)
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
@@ -99,14 +140,19 @@
 }
 TEST_SUITE_END()
 
+template <typename T1, typename T2 = T1>
+using NEArithmeticSubtractionToS16Fixture = NEArithmeticSubtractionFixture<T1, T2, int16_t>;
+
 TEST_SUITE(S16)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()),
+                                                                                   framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
+                                                                           framework::dataset::make("DataType", { DataType::U8, DataType::S16 })),
                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-               shape, data_type, policy)
+               shape, data_type1, data_type2, policy)
 {
     // Create tensors
-    Tensor ref_src1 = create_tensor<Tensor>(shape, data_type);
-    Tensor ref_src2 = create_tensor<Tensor>(shape, DataType::S16);
+    Tensor ref_src1 = create_tensor<Tensor>(shape, data_type1);
+    Tensor ref_src2 = create_tensor<Tensor>(shape, data_type2);
     Tensor dst      = create_tensor<Tensor>(shape, DataType::S16);
 
     // Create and Configure function
@@ -124,27 +170,86 @@
     validate(dst.info()->padding(), padding);
 }
 
-FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticSubtractionS16Dataset),
-                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+TEST_SUITE(S16_S16_S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionToS16Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticSubtractionS16Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticSubtractionS16Dataset),
-                                                                                                                   framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionToS16Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticSubtractionS16Dataset),
+                                                                                                                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
 TEST_SUITE_END()
 
-template <typename T>
-using NEArithmeticSubtractionFixedPointFixture = ArithmeticSubtractionValidationFixedPointFixture<Tensor, Accessor, NEArithmeticSubtraction, T>;
+TEST_SUITE(U8_U8_S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionToS16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
+                       ArithmeticSubtractionU8U8S16Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionToS16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
+                                                                                                                        ArithmeticSubtractionU8U8S16Dataset),
+                                                                                                                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(S16_U8_S16)
+using NEAriSubS16U8ToS16Fixture = NEArithmeticSubtractionToS16Fixture<int16_t, uint8_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEAriSubS16U8ToS16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
+                                                                                                               ArithmeticSubtractionS16U8S16Dataset),
+                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEAriSubS16U8ToS16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
+                                                                                                             ArithmeticSubtractionS16U8S16Dataset),
+                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U8_S16_S16)
+using NEAriSubU8S16ToS16Fixture = NEArithmeticSubtractionToS16Fixture<uint8_t, int16_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEAriSubU8S16ToS16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
+                                                                                                               ArithmeticSubtractionU8S16S16Dataset),
+                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEAriSubU8S16ToS16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
+                                                                                                             ArithmeticSubtractionU8S16S16Dataset),
+                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+template <typename T1, typename T2 = T1, typename T3 = T1>
+using NEArithmeticSubtractionFixedPointFixture = ArithmeticSubtractionValidationFixedPointFixture<Tensor, Accessor, NEArithmeticSubtraction, T1, T2, T3>;
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionQS8Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+                       ArithmeticSubtractionQS8Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 7)))
 {
@@ -152,7 +257,8 @@
     validate(Accessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticSubtractionQS8Dataset),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       ArithmeticSubtractionQS8Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 7)))
 {
@@ -171,7 +277,8 @@
     validate(Accessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticSubtractionQS16Dataset),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticSubtractionFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       ArithmeticSubtractionQS16Dataset),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        framework::dataset::make("FractionalBits", 1, 15)))
 {
@@ -182,7 +289,7 @@
 TEST_SUITE_END()
 
 TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticSubtractionFP16Dataset),
                                                                                                             framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
@@ -191,7 +298,7 @@
     validate(Accessor(_target), _reference);
 }
 TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 TEST_SUITE(FP32)
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
diff --git a/tests/validation/NEON/BatchNormalizationLayer.cpp b/tests/validation/NEON/BatchNormalizationLayer.cpp
index 9ca26eb..dfa32bb 100644
--- a/tests/validation/NEON/BatchNormalizationLayer.cpp
+++ b/tests/validation/NEON/BatchNormalizationLayer.cpp
@@ -44,9 +44,9 @@
 namespace
 {
 constexpr AbsoluteTolerance<float> tolerance_f32(0.00001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 constexpr AbsoluteTolerance<float> tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
-#endif                                                   /* ARM_COMPUTE_ENABLE_FP16 */
+#endif                                                   /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 constexpr AbsoluteTolerance<float> tolerance_qs8(3.0f);  /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS8 */
 constexpr AbsoluteTolerance<float> tolerance_qs16(6.0f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS16 */
 } // namespace
@@ -61,7 +61,7 @@
                shape0, shape1, epsilon, dt)
 {
     // Set fixed point position data type allowed
-    int fixed_point_position = (arm_compute::is_data_type_fixed_point(dt)) ? 3 : 0;
+    const int fixed_point_position = (arm_compute::is_data_type_fixed_point(dt)) ? 3 : 0;
 
     // Create tensors
     Tensor src   = create_tensor<Tensor>(shape0, dt, 1, fixed_point_position);
@@ -80,6 +80,52 @@
     validate(dst.info()->valid_region(), valid_region);
 }
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Window shrink
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),    // Mismatching data types
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),    // Mismatching data types
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),    // Invalid mean/var/beta/gamma shape
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2), // Mismatching fixed point position
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                     }),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                       TensorInfo(),
+                                                     })),
+               framework::dataset::make("MVBGInfo",{ TensorInfo(TensorShape(2U), 1, DataType::F32),
+                                                     TensorInfo(TensorShape(2U), 1, DataType::F32),
+                                                     TensorInfo(TensorShape(2U), 1, DataType::F16),
+                                                     TensorInfo(TensorShape(2U), 1, DataType::F32),
+                                                     TensorInfo(TensorShape(5U), 1, DataType::F32),
+                                                     TensorInfo(TensorShape(2U), 1, DataType::QS8, 2),
+                                                     TensorInfo(TensorShape(2U), 1, DataType::QS8, 2),
+                                                     TensorInfo(TensorShape(2U), 1, DataType::QS8, 2),
+                                                   })),
+               framework::dataset::make("Expected", { true, false, false, false, false, false, true, true})),
+               input_info, output_info, mvbg_info, expected)
+{
+    const auto &mean_info = mvbg_info;
+    const auto &var_info = mvbg_info;
+    const auto &beta_info = mvbg_info;
+    const auto &gamma_info = mvbg_info;
+    bool has_error = bool(NEBatchNormalizationLayer::validate(
+            &input_info.clone()->set_is_resizable(false), output_info.total_size() ? &output_info.clone()->set_is_resizable(false) : nullptr,
+            &mean_info.clone()->set_is_resizable(false), &var_info.clone()->set_is_resizable(false),
+            &beta_info.clone()->set_is_resizable(false), &gamma_info.clone()->set_is_resizable(false), 1.f));
+    ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 TEST_SUITE(Float)
 FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(),
                                                                                                                    framework::dataset::make("DataType", DataType::F32)))
@@ -89,7 +135,7 @@
 }
 TEST_SUITE_END()
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(Float16)
 FIXTURE_DATA_TEST_CASE(Random, NEBatchNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(),
                                                                                                                   framework::dataset::make("DataType", DataType::F16)))
@@ -98,7 +144,7 @@
     validate(Accessor(_target), _reference, tolerance_f16, 0);
 }
 TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 TEST_SUITE(Quantized)
 template <typename T>
diff --git a/tests/validation/NEON/Col2Im.cpp b/tests/validation/NEON/Col2Im.cpp
new file mode 100644
index 0000000..9125dc2
--- /dev/null
+++ b/tests/validation/NEON/Col2Im.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NECol2Im.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(NEON)
+TEST_SUITE(Col2Im)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(10U, 12U, 1U, 2U), 1, DataType::S64),    // Unsupported data type
+                                                       TensorInfo(TensorShape(10U, 12U, 1U, 2U), 1, DataType::F32),    // Mismatching data type
+                                                       TensorInfo(TensorShape(10U, 12U, 1U, 2U), 1, DataType::QS8, 2), // Mismatching fixed point
+                                                       TensorInfo(TensorShape(10U, 12U, 1U, 2U), 1, DataType::F32),    // Invalid output shape
+                                                       TensorInfo(TensorShape(10U, 12U, 1U, 2U), 1, DataType::F32),
+                                                     }),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(3U, 4U, 10U, 2U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(3U, 4U, 10U, 2U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(3U, 4U, 10U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(3U, 3U, 10U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(3U, 4U, 10U, 2U), 1, DataType::F32),
+                                                     })),
+               framework::dataset::make("ConvolvedWidth", { 3, 3, 3, 3, 3 })),
+               framework::dataset::make("ConvolvedHeight", { 4, 4, 4, 4, 4 })),
+               framework::dataset::make("Expected", { false, false, false, false, true })),
+               input_info, output_info, convolved_width, convolved_height, expected)
+{
+    bool err = bool(NECol2Im::validate(&input_info, &output_info, Size2D(convolved_width, convolved_height)));
+    ARM_COMPUTE_EXPECT(err == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
index 944c506..1fd28df 100644
--- a/tests/validation/NEON/ConvolutionLayer.cpp
+++ b/tests/validation/NEON/ConvolutionLayer.cpp
@@ -23,6 +23,7 @@
  */
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEWinogradLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/NEON/Accessor.h"
@@ -34,6 +35,7 @@
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
 #include "tests/validation/fixtures/ConvolutionLayerFixture.h"
+#include "tests/validation/fixtures/WinogradLayerFixture.h"
 
 namespace arm_compute
 {
@@ -44,17 +46,17 @@
 namespace
 {
 const AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 const AbsoluteTolerance<float> tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
-#endif                                               /* ARM_COMPUTE_ENABLE_FP16 */
+#endif                                               /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 const AbsoluteTolerance<float> tolerance_q(1.0f);    /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */
 
 /** CNN data types */
 const auto CNNDataTypes = framework::dataset::make("DataType",
 {
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
     DataType::F16,
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
     DataType::F32,
     DataType::QS8,
     DataType::QS16,
@@ -62,6 +64,23 @@
 } // namespace
 
 TEST_SUITE(NEON)
+
+#if defined(__aarch64__)
+TEST_SUITE(WinogradLayer)
+template <typename T>
+using NEWinogradLayerFixture = WinogradLayerValidationFixture<Tensor, Accessor, NEWinogradLayer, T>;
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradLayerFixture<float>, framework::DatasetMode::PRECOMMIT, datasets::SmallWinogradLayerDataset())
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+#endif /* __aarch64__ */
+
 TEST_SUITE(ConvolutionLayer)
 
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallConvolutionLayerDataset(), datasets::LargeConvolutionLayerDataset()), CNNDataTypes),
@@ -101,7 +120,7 @@
 using NEConvolutionLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallConvolutionLayerDataset(),
                                                                                                                      framework::dataset::make("ReshapeWeights", { true, false })),
@@ -118,7 +137,7 @@
     validate(Accessor(_target), _reference, tolerance_f16);
 }
 TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallConvolutionLayerDataset(),
diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp
new file mode 100644
index 0000000..751a965
--- /dev/null
+++ b/tests/validation/NEON/DeconvolutionLayer.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DeconvolutionLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
+
+const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0,
+                     2)
+                     * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("ax", 1, 3) * framework::dataset::make("ay", 1, 3) * framework::dataset::make("NumKernels", { 1, 3 })
+                     *framework::dataset::make("ux", 1, 4) *framework::dataset::make("uy", 1, 4);
+
+const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0,
+                     1)
+                     * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("ax", 1, 3) * framework::dataset::make("ay", 1, 3) * framework::dataset::make("NumKernels", { 1, 3 })
+                     *framework::dataset::make("ux", 1, 4) *framework::dataset::make("uy", 1, 4);
+
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(DeconvolutionLayer)
+
+template <typename T>
+using NEDeconvolutionLayerFixture3x3 = DeconvolutionValidationFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 3, 3>;
+
+template <typename T>
+using NEDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 1, 1>;
+
+TEST_SUITE(Float)
+
+TEST_SUITE(FP32)
+TEST_SUITE(W3x3)
+
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(data3x3, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(W1x1)
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture1x1<float>, framework::DatasetMode::ALL, combine(data1x1, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/DepthConcatenateLayer.cpp b/tests/validation/NEON/DepthConcatenateLayer.cpp
index f7957a5..c79c7db 100644
--- a/tests/validation/NEON/DepthConcatenateLayer.cpp
+++ b/tests/validation/NEON/DepthConcatenateLayer.cpp
@@ -22,7 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthConcatenate.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/NEON/Accessor.h"
@@ -43,10 +43,10 @@
 TEST_SUITE(DepthConcatenateLayer)
 
 template <typename T>
-using NEDepthConcatenateLayerFixture = DepthConcatenateValidationFixture<Tensor, ITensor, Accessor, NEDepthConcatenate, T>;
+using NEDepthConcatenateLayerFixture = DepthConcatenateLayerValidationFixture<Tensor, ITensor, Accessor, NEDepthConcatenateLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
                                                                                                                   DataType::F16)))
@@ -54,14 +54,14 @@
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType",
                                                                                                                 DataType::F16)))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
 TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
@@ -70,7 +70,7 @@
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType",
                                                                                                                  DataType::F32)))
 {
     // Validate output
@@ -88,7 +88,7 @@
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(),
                                                                                                                   framework::dataset::make("DataType",
                                                                                                                           DataType::QS8)))
 {
@@ -105,7 +105,7 @@
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(),
                                                                                                                    framework::dataset::make("DataType",
                                                                                                                            DataType::QS16)))
 {
diff --git a/tests/validation/NEON/DepthConvert.cpp b/tests/validation/NEON/DepthConvert.cpp
deleted file mode 100644
index e036cc4..0000000
--- a/tests/validation/NEON/DepthConvert.cpp
+++ /dev/null
@@ -1,484 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/NEON/Accessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ConvertPolicyDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/DepthConvertFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/** Input data sets **/
-const auto DepthConvertU8toU16Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16));
-const auto DepthConvertU8toS16Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16));
-const auto DepthConvertU8toS32Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32));
-const auto DepthConvertU16toU8Dataset             = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8));
-const auto DepthConvertU16toU32Dataset            = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32));
-const auto DepthConvertS16toU8Dataset             = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8));
-const auto DepthConvertS16toS32Dataset            = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32));
-const auto DepthConvertQS8toFP32Dataset           = combine(framework::dataset::make("DataType", DataType::QS8), framework::dataset::make("DataType", DataType::F32));
-const auto DepthConvertQS16toFP32Dataset          = combine(framework::dataset::make("DataType", DataType::QS16), framework::dataset::make("DataType", DataType::F32));
-const auto DepthConvertFP32toQS8Dataset           = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS8));
-const auto DepthConvertFP32toQS16Dataset          = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS16));
-const auto DepthConvertShiftDataset               = framework::dataset::make("Shift", 0, 7);
-const auto DepthConvertFixedPointQuantizedDataset = framework::dataset::make("FractionalBits", 1, 7);
-} // namespace
-
-TEST_SUITE(NEON)
-TEST_SUITE(DepthConvert)
-template <typename T>
-using NEDepthConvertToU16Fixture = DepthConvertValidationFixture<Tensor, Accessor, NEDepthConvert, T, uint16_t>;
-template <typename T>
-using NEDepthConvertToS16Fixture = DepthConvertValidationFixture<Tensor, Accessor, NEDepthConvert, T, int16_t>;
-template <typename T>
-using NEDepthConvertToS32Fixture = DepthConvertValidationFixture<Tensor, Accessor, NEDepthConvert, T, int32_t>;
-template <typename T>
-using NEDepthConvertToU8Fixture = DepthConvertValidationFixture<Tensor, Accessor, NEDepthConvert, T, uint8_t>;
-template <typename T>
-using NEDepthConvertToU32Fixture = DepthConvertValidationFixture<Tensor, Accessor, NEDepthConvert, T, uint32_t>;
-template <typename T>
-using NEDepthConvertToFP32FixedPointFixture = DepthConvertValidationFractionalBitsFixture<Tensor, Accessor, NEDepthConvert, T, float>;
-template <typename T>
-using NEDepthConvertToQS8FixedPointFixture = DepthConvertValidationFractionalBitsFixture<Tensor, Accessor, NEDepthConvert, T, int8_t>;
-template <typename T>
-using NEDepthConvertToQS16FixedPointFixture = DepthConvertValidationFractionalBitsFixture<Tensor, Accessor, NEDepthConvert, T, int16_t>;
-
-TEST_SUITE(U8_to_U16)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
-    Tensor dst = create_tensor<Tensor>(shape, DataType::U16, 1, fixed_point_position);
-
-    // Create and Configure function
-    NEDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToU16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU8toU16Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToU16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU8toU16Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(U8_to_S16)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
-    Tensor dst = create_tensor<Tensor>(shape, DataType::S16, 1, fixed_point_position);
-
-    // Create and Configure function
-    NEDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToS16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU8toS16Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToS16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU8toS16Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END()
-TEST_SUITE(U8_to_S32)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
-    Tensor dst = create_tensor<Tensor>(shape, DataType::S32, 1, fixed_point_position);
-
-    // Create and Configure function
-    NEDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToS32Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU8toS32Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToS32Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU8toS32Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(U16_to_U8)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, DataType::U16, 1, fixed_point_position);
-    Tensor dst = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
-
-    // Create and Configure function
-    NEDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToU8Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU16toU8Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToU8Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU16toU8Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(U16_to_U32)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, DataType::U16, 1, fixed_point_position);
-    Tensor dst = create_tensor<Tensor>(shape, DataType::U32, 1, fixed_point_position);
-
-    // Create and Configure function
-    NEDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToU32Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU16toU32Dataset),
-                                                                                                                  framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                  DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToU32Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU16toU32Dataset),
-                                                                                                                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(S16_to_U8)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, DataType::S16, 1, fixed_point_position);
-    Tensor dst = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
-
-    // Create and Configure function
-    NEDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToU8Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertS16toU8Dataset),
-                                                                                                                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToU8Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertS16toU8Dataset),
-                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                              DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(S16_to_S32)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, DataType::S16, 1, fixed_point_position);
-    Tensor dst = create_tensor<Tensor>(shape, DataType::S32, 1, fixed_point_position);
-
-    // Create and Configure function
-    NEDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToS32Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertS16toS32Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToS32Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertS16toS32Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(Quantized_to_FP32)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })),
-                                                                           framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertFixedPointQuantizedDataset),
-               shape, dt, policy, fixed_point_position)
-{
-    int shift = 0;
-
-    // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, dt, 1, fixed_point_position);
-    Tensor dst = create_tensor<Tensor>(shape, DataType::F32, 1, fixed_point_position);
-
-    // Create and Configure function
-    NEDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-FIXTURE_DATA_TEST_CASE(RunSmallQS8, NEDepthConvertToFP32FixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertQS8toFP32Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunSmallQS16, NEDepthConvertToFP32FixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertQS16toFP32Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeQS8, NEDepthConvertToFP32FixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertQS8toFP32Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeQS16, NEDepthConvertToFP32FixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertQS16toFP32Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(FP32_to_Quantized)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })),
-                                                                           framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertFixedPointQuantizedDataset),
-               shape, dt, policy, fixed_point_position)
-{
-    int shift = 0;
-
-    // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, DataType::F32, 1, fixed_point_position);
-    Tensor dst = create_tensor<Tensor>(shape, dt, 1, fixed_point_position);
-
-    // Create and Configure function
-    NEDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-FIXTURE_DATA_TEST_CASE(RunSmallQS8, NEDepthConvertToQS8FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertFP32toQS8Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunSmallQS16, NEDepthConvertToQS16FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertFP32toQS16Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeQS8, NEDepthConvertToQS8FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertFP32toQS8Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeQS16, NEDepthConvertToQS16FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertFP32toQS16Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/NEON/DepthConvertLayer.cpp b/tests/validation/NEON/DepthConvertLayer.cpp
new file mode 100644
index 0000000..a56298b
--- /dev/null
+++ b/tests/validation/NEON/DepthConvertLayer.cpp
@@ -0,0 +1,492 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DepthConvertLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Input data sets **/
+const auto DepthConvertLayerU8toU16Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16));
+const auto DepthConvertLayerU8toS16Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16));
+const auto DepthConvertLayerU8toS32Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32));
+const auto DepthConvertLayerU16toU8Dataset             = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8));
+const auto DepthConvertLayerU16toU32Dataset            = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32));
+const auto DepthConvertLayerS16toU8Dataset             = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8));
+const auto DepthConvertLayerS16toS32Dataset            = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32));
+const auto DepthConvertLayerQS8toFP32Dataset           = combine(framework::dataset::make("DataType", DataType::QS8), framework::dataset::make("DataType", DataType::F32));
+const auto DepthConvertLayerQS16toFP32Dataset          = combine(framework::dataset::make("DataType", DataType::QS16), framework::dataset::make("DataType", DataType::F32));
+const auto DepthConvertLayerFP32toQS8Dataset           = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS8));
+const auto DepthConvertLayerFP32toQS16Dataset          = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS16));
+const auto DepthConvertLayerShiftDataset               = framework::dataset::make("Shift", 0, 7);
+const auto DepthConvertLayerFixedPointQuantizedDataset = framework::dataset::make("FractionalBits", 1, 7);
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(DepthConvertLayer)
+template <typename T>
+using NEDepthConvertLayerToU16Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, uint16_t>;
+template <typename T>
+using NEDepthConvertLayerToS16Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, int16_t>;
+template <typename T>
+using NEDepthConvertLayerToS32Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, int32_t>;
+template <typename T>
+using NEDepthConvertLayerToU8Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, uint8_t>;
+template <typename T>
+using NEDepthConvertLayerToU32Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, uint32_t>;
+template <typename T>
+using NEDepthConvertLayerToFP32FixedPointFixture = DepthConvertLayerValidationFractionalBitsFixture<Tensor, Accessor, NEDepthConvertLayer, T, float>;
+template <typename T>
+using NEDepthConvertLayerToQS8FixedPointFixture = DepthConvertLayerValidationFractionalBitsFixture<Tensor, Accessor, NEDepthConvertLayer, T, int8_t>;
+template <typename T>
+using NEDepthConvertLayerToQS16FixedPointFixture = DepthConvertLayerValidationFractionalBitsFixture<Tensor, Accessor, NEDepthConvertLayer, T, int16_t>;
+
+TEST_SUITE(U8_to_U16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
+    Tensor dst = create_tensor<Tensor>(shape, DataType::U16, 1, fixed_point_position);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toU16Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toU16Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U8_to_S16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
+    Tensor dst = create_tensor<Tensor>(shape, DataType::S16, 1, fixed_point_position);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toS16Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toS16Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+TEST_SUITE(U8_to_S32)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
+    Tensor dst = create_tensor<Tensor>(shape, DataType::S32, 1, fixed_point_position);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS32Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toS32Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toS32Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U16_to_U8)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::U16, 1, fixed_point_position);
+    Tensor dst = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU8Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU8Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU8Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU16toU8Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U16_to_U32)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::U16, 1, fixed_point_position);
+    Tensor dst = create_tensor<Tensor>(shape, DataType::U32, 1, fixed_point_position);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU32Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU32Dataset),
+                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                       DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU32Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU16toU32Dataset),
+                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                     DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(S16_to_U8)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::S16, 1, fixed_point_position);
+    Tensor dst = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU8Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS16toU8Dataset),
+                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                     DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU8Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS16toU8Dataset),
+                                                                                                                   framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                   DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(S16_to_S32)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::S16, 1, fixed_point_position);
+    Tensor dst = create_tensor<Tensor>(shape, DataType::S32, 1, fixed_point_position);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS32Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS16toS32Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS16toS32Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(Quantized_to_FP32)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })),
+                                                                           framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerFixedPointQuantizedDataset),
+               shape, dt, policy, fixed_point_position)
+{
+    int shift = 0;
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, dt, 1, fixed_point_position);
+    Tensor dst = create_tensor<Tensor>(shape, DataType::F32, 1, fixed_point_position);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallQS8, NEDepthConvertLayerToFP32FixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+                       DepthConvertLayerQS8toFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallQS16, NEDepthConvertLayerToFP32FixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+                       DepthConvertLayerQS16toFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeQS8, NEDepthConvertLayerToFP32FixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       DepthConvertLayerQS8toFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeQS16, NEDepthConvertLayerToFP32FixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       DepthConvertLayerQS16toFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32_to_Quantized)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })),
+                                                                           framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerFixedPointQuantizedDataset),
+               shape, dt, policy, fixed_point_position)
+{
+    int shift = 0;
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::F32, 1, fixed_point_position);
+    Tensor dst = create_tensor<Tensor>(shape, dt, 1, fixed_point_position);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallQS8, NEDepthConvertLayerToQS8FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+                       DepthConvertLayerFP32toQS8Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallQS16, NEDepthConvertLayerToQS16FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+                       DepthConvertLayerFP32toQS16Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeQS8, NEDepthConvertLayerToQS8FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       DepthConvertLayerFP32toQS8Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeQS16, NEDepthConvertLayerToQS16FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       DepthConvertLayerFP32toQS16Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
new file mode 100644
index 0000000..17eaaf8
--- /dev/null
+++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/DepthwiseConvolutionLayerDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr RelativeTolerance<float> tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(DepthwiseConvolutionLayer)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                                                              datasets::LargeDepthwiseConvolutionLayerDataset3x3()),
+                                                                   framework::dataset::make("DataType", DataType::F32)),
+               input_shape, weights_shape, bias_shape, output_shape, info, data_type)
+{
+    // Create tensors
+    Tensor src     = create_tensor<Tensor>(input_shape, data_type);
+    Tensor dst     = create_tensor<Tensor>(output_shape, data_type);
+    Tensor weights = create_tensor<Tensor>(weights_shape, data_type);
+    Tensor bias    = create_tensor<Tensor>(bias_shape, data_type);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NEDepthwiseConvolutionLayer3x3 depthwise_layer;
+    depthwise_layer.configure(&src, &weights, &bias, &dst, info);
+
+    // Validate valid region
+    const ValidRegion input_valid_region   = shape_to_valid_region(input_shape);
+    const ValidRegion output_valid_region  = shape_to_valid_region(output_shape);
+    const ValidRegion weights_valid_region = shape_to_valid_region(weights_shape);
+    const ValidRegion bias_valid_region    = shape_to_valid_region(bias_shape);
+
+    validate(src.info()->valid_region(), input_valid_region);
+    validate(dst.info()->valid_region(), output_valid_region);
+    validate(weights.info()->valid_region(), weights_valid_region);
+    validate(bias.info()->valid_region(), bias_valid_region);
+
+    // Validate padding
+    const int         step    = 16 >> info.stride().first;
+    const PaddingSize padding = PaddingCalculator(output_shape.x(), step).required_padding();
+    validate(dst.info()->padding(), padding);
+}
+
+TEST_SUITE(Float)
+TEST_SUITE(F32)
+TEST_SUITE(Generic)
+template <typename T>
+using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                                                                                                       framework::dataset::make("DataType",
+                                                                                                                               DataType::F32)))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+                                                                                                                     framework::dataset::make("DataType",
+                                                                                                                             DataType::F32)))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(W3x3)
+template <typename T>
+using NEDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer3x3, T>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                    framework::dataset::make("DataType",
+                                                                                                                            DataType::F32)))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::F32)))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/DepthwiseSeparableConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseSeparableConvolutionLayer.cpp
new file mode 100644
index 0000000..c3e6705
--- /dev/null
+++ b/tests/validation/NEON/DepthwiseSeparableConvolutionLayer.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/MobileNetDepthwiseSeparableConvolutionLayerDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DepthwiseSeparableConvolutionLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<float> tolerance_f32(0.1f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+const float              tolerance_num = 0.001f;
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(DepthwiseSeparableConvolutionLayer)
+
+// Configuration test to do
+
+template <typename T>
+using NEDepthwiseSeparableConvolutionLayerFixture = DepthwiseSeparableConvolutionValidationFixture<Tensor, Accessor, NEDepthwiseSeparableConvolutionLayer, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseSeparableConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, datasets::MobileNetDepthwiseSeparableConvolutionLayerDataset())
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_f32, tolerance_num);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/Dilate.cpp b/tests/validation/NEON/Dilate.cpp
new file mode 100644
index 0000000..af1af8e
--- /dev/null
+++ b/tests/validation/NEON/Dilate.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEDilate.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DilateFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr unsigned int filter_size = 3;              /* Size of the kernel/filter in number of elements. */
+constexpr BorderSize   border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(Dilate)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+                                                                   datasets::BorderModes()),
+               shape, data_type, border_mode)
+{
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, data_type);
+    Tensor dst = create_tensor<Tensor>(shape, data_type);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NEDilate dilate;
+    dilate.configure(&src, &dst, border_mode);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), border_size);
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+    calculator.set_border_size(1);
+    calculator.set_border_mode(border_mode);
+
+    const PaddingSize dst_padding = calculator.required_padding();
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-1);
+
+    const PaddingSize src_padding = calculator.required_padding();
+
+    validate(src.info()->padding(), src_padding);
+    validate(dst.info()->padding(), dst_padding);
+}
+
+template <typename T>
+using NEDilateFixture = DilateValidationFixture<Tensor, Accessor, NEDilate, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDilateFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                              DataType::U8)),
+                                                                                                      datasets::BorderModes()))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDilateFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                            DataType::U8)),
+                                                                                                    datasets::BorderModes()))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/DirectConvolutionLayer.cpp b/tests/validation/NEON/DirectConvolutionLayer.cpp
index 314041b..9982b49 100644
--- a/tests/validation/NEON/DirectConvolutionLayer.cpp
+++ b/tests/validation/NEON/DirectConvolutionLayer.cpp
@@ -43,9 +43,9 @@
 namespace
 {
 constexpr AbsoluteTolerance<float> tolerance_qs(1.f); /**< Tolerance for fixed point tests */
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 constexpr AbsoluteTolerance<float> tolerance_fp16(0.01f);  /**< Tolerance for half precision floating point tests */
-#endif                                                     /* ARM_COMPUTE_ENABLE_FP16 */
+#endif                                                     /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
 
 /** Direct convolution data set. */
@@ -91,11 +91,73 @@
 TEST_SUITE(NEON)
 TEST_SUITE(DirectConvolutionLayer)
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
+        framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type input/weights
+                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching input feature maps
+                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Unsupported kernel width
+                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Non-rectangular weights dimensions
+                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid weights dimensions
+                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid stride
+                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid biases size
+                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid biases dimensions
+                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid output size
+                                              }),
+        framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F16, 0),
+                                                 TensorInfo(TensorShape(3U, 3U, 3U, 4U), 1, DataType::F32, 0),
+                                                 TensorInfo(TensorShape(9U, 9U, 2U, 4U), 1, DataType::F32, 0),
+                                                 TensorInfo(TensorShape(5U, 3U, 2U, 4U), 1, DataType::F32, 0),
+                                                 TensorInfo(TensorShape(3U, 3U, 2U, 4U, 3U), 1, DataType::F32, 0),
+                                                 TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
+                                                 TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
+                                                 TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
+                                                 TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
+                                              })),
+        framework::dataset::make("BiasesInfo",{ TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(3U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(4U, 2U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                              })),
+        framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, 0),
+                                                TensorInfo(TensorShape(26U, 11U, 4U), 1, DataType::F32, 0),
+                                              })),
+        framework::dataset::make("ConvInfo",  { PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(3, 3, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                               })),
+        framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false })),
+        input_info, weights_info, biases_info, output_info, conv_info, expected)
+{
+        bool is_valid = bool(NEDirectConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info));
+        ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 template <typename T>
 using NEDirectConvolutionLayerFixture = DirectConvolutionValidationFixture<Tensor, Accessor, NEDirectConvolutionLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(Run, NEDirectConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(data_f32, framework::dataset::make("DataType", DataType::F16)))
 {
@@ -103,7 +165,7 @@
     validate(Accessor(_target), _reference, tolerance_fp16);
 }
 TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(Run, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(data_f32, framework::dataset::make("DataType", DataType::F32)))
diff --git a/tests/validation/NEON/Erode.cpp b/tests/validation/NEON/Erode.cpp
new file mode 100644
index 0000000..d1763a2
--- /dev/null
+++ b/tests/validation/NEON/Erode.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEErode.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ErodeFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr unsigned int filter_size = 3;              /* Size of the kernel/filter in number of elements. */
+constexpr BorderSize   border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(Erode)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+                                                                   datasets::BorderModes()),
+               shape, data_type, border_mode)
+{
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, data_type);
+    Tensor dst = create_tensor<Tensor>(shape, data_type);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NEErode erode;
+    erode.configure(&src, &dst, border_mode);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), border_size);
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+    calculator.set_border_size(1);
+    calculator.set_border_mode(border_mode);
+
+    const PaddingSize dst_padding = calculator.required_padding();
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-1);
+
+    const PaddingSize src_padding = calculator.required_padding();
+
+    validate(src.info()->padding(), src_padding);
+    validate(dst.info()->padding(), dst_padding);
+}
+
+template <typename T>
+using NEErodeFixture = ErodeValidationFixture<Tensor, Accessor, NEErode, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEErodeFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                             DataType::U8)),
+                                                                                                     datasets::BorderModes()))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEErodeFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                           DataType::U8)),
+                                                                                                   datasets::BorderModes()))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/FixedPoint/FixedPoint.cpp b/tests/validation/NEON/FixedPoint/FixedPoint.cpp
index 3b25b05..871143b 100644
--- a/tests/validation/NEON/FixedPoint/FixedPoint.cpp
+++ b/tests/validation/NEON/FixedPoint/FixedPoint.cpp
@@ -61,7 +61,7 @@
 TEST_SUITE(QS8)
 TEST_SUITE(Exp)
 
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShape(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::QS8)),
                                                                                                            framework::dataset::make("FixedPointOp", FixedPointOp::EXP)),
                                                                                                    framework::dataset::make("FractionalBits", 1, 7)))
@@ -72,7 +72,7 @@
 TEST_SUITE_END()
 
 TEST_SUITE(Invsqrt)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShape(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::QS8)),
                                                                                                            framework::dataset::make("FixedPointOp", FixedPointOp::INV_SQRT)),
                                                                                                    framework::dataset::make("FractionalBits", 1, 6)))
@@ -83,7 +83,7 @@
 TEST_SUITE_END()
 
 TEST_SUITE(Log)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShape(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::QS8)),
                                                                                                            framework::dataset::make("FixedPointOp", FixedPointOp::LOG)),
                                                                                                    framework::dataset::make("FractionalBits", 3, 6)))
@@ -94,7 +94,7 @@
 TEST_SUITE_END()
 
 TEST_SUITE(Reciprocal)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShape(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::QS8)),
                                                                                                            framework::dataset::make("FixedPointOp", FixedPointOp::RECIPROCAL)),
                                                                                                    framework::dataset::make("FractionalBits", 1, 6)))
@@ -107,7 +107,7 @@
 
 TEST_SUITE(QS16)
 TEST_SUITE(Exp)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFixedPointFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShape(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFixedPointFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShapes(), framework::dataset::make("DataType",
                                                                                                                     DataType::QS16)),
                                                                                                             framework::dataset::make("FixedPointOp", FixedPointOp::EXP)),
                                                                                                     framework::dataset::make("FractionalBits", 1, 15)))
@@ -130,7 +130,7 @@
 TEST_SUITE_END()
 
 TEST_SUITE(Log)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFixedPointFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShape(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFixedPointFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShapes(), framework::dataset::make("DataType",
                                                                                                                     DataType::QS16)),
                                                                                                             framework::dataset::make("FixedPointOp", FixedPointOp::LOG)),
                                                                                                     framework::dataset::make("FractionalBits", 4, 14)))
@@ -141,7 +141,7 @@
 TEST_SUITE_END()
 
 TEST_SUITE(Reciprocal)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEFixedPointFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShape(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunSmall, NEFixedPointFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::Small1DShapes(), framework::dataset::make("DataType",
                                                                                                                     DataType::QS16)),
                                                                                                             framework::dataset::make("FixedPointOp", FixedPointOp::RECIPROCAL)),
                                                                                                     framework::dataset::make("FractionalBits", 1, 14)))
diff --git a/tests/validation/NEON/FixedPoint/FixedPointTarget.h b/tests/validation/NEON/FixedPoint/FixedPointTarget.h
index fa1d97f..31ccc08 100644
--- a/tests/validation/NEON/FixedPoint/FixedPointTarget.h
+++ b/tests/validation/NEON/FixedPoint/FixedPointTarget.h
@@ -50,6 +50,7 @@
             window                                                   = calculate_max_window(*src.info(), Steps(num_elems_processed_per_iteration));
             AccessWindowHorizontal input_access(src.info(), 0, num_elems_processed_per_iteration);
             AccessWindowHorizontal output_access(dst.info(), 0, num_elems_processed_per_iteration);
+            update_window_and_padding(window, input_access, output_access);
             break;
         }
         case DataType::QS16:
@@ -58,6 +59,7 @@
             window                                                   = calculate_max_window(*src.info(), Steps(num_elems_processed_per_iteration));
             AccessWindowHorizontal input_access(src.info(), 0, num_elems_processed_per_iteration);
             AccessWindowHorizontal output_access(dst.info(), 0, num_elems_processed_per_iteration);
+            update_window_and_padding(window, input_access, output_access);
             break;
         }
         default:
diff --git a/tests/validation/NEON/FixedPointPixelWiseMultiplication.cpp b/tests/validation/NEON/FixedPointPixelWiseMultiplication.cpp
new file mode 100644
index 0000000..f0a499d
--- /dev/null
+++ b/tests/validation/NEON/FixedPointPixelWiseMultiplication.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/FixedPointPixelWiseMultiplicationFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+const float tolerance   = 1.f;
+const float scale_255   = 1.f / 255.f;
+const float scale_unity = 1.f;
+
+// *INDENT-OFF*
+// clang-format off
+#define FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(TEST_NAME, FIXTURE, MODE, SHAPES, DT1, DT2, SCALE, RP, FPP_START, FPP_END) \
+    FIXTURE_DATA_TEST_CASE(TEST_NAME, NEFixedPointPixelWiseMultiplication##FIXTURE, framework::DatasetMode::MODE,                      \
+                           combine(combine(combine(combine(combine(combine(                                                            \
+                           datasets::SHAPES,                                                                                           \
+                           framework::dataset::make("DataType1", DataType::DT1)),                                                      \
+                           framework::dataset::make("DataType2", DataType::DT2)),                                                      \
+                           framework::dataset::make("Scale", std::move(SCALE))),                                                       \
+                           datasets::ConvertPolicies()),                                                                               \
+                           framework::dataset::make("RoundingPolicy", RoundingPolicy::RP)),                                            \
+                           framework::dataset::make("FixedPointPosition", FPP_START, FPP_END)))                                        \
+    {                                                                                                                                  \
+        validate(Accessor(_target), _reference);                                                                                       \
+    }
+
+#define FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(TEST_NAME, FIXTURE, MODE, SHAPES, DT1, DT2, RP, FPP, TOLERANCE)  \
+    FIXTURE_DATA_TEST_CASE(TEST_NAME, NEFixedPointPixelWiseMultiplication##FIXTURE, framework::DatasetMode::MODE,                  \
+                           combine(combine(combine(combine(combine(combine(                                                        \
+                           datasets::SHAPES,                                                                                       \
+                           framework::dataset::make("DataType1", DataType::DT1)),                                                  \
+                           framework::dataset::make("DataType2", DataType::DT2)),                                                  \
+                           framework::dataset::make("Scale", 1.f / static_cast<float>(1 << (FPP)))),                               \
+                           datasets::ConvertPolicies()),                                                                           \
+                           framework::dataset::make("RoundingPolicy", RoundingPolicy::RP)),                                        \
+                           framework::dataset::make("FixedPointPosition", FPP)))                                                   \
+    {                                                                                                                              \
+        validate(Accessor(_target), _reference, AbsoluteTolerance<float>(TOLERANCE), 0.f);                                         \
+    }
+// clang-format on
+// *INDENT-ON*
+} // namespace
+
+template <typename T>
+using NEFixedPointPixelWiseMultiplicationFixture = FixedPointPixelWiseMultiplicationValidationFixture<Tensor, Accessor, NEPixelWiseMultiplication, T>;
+
+TEST_SUITE(NEON)
+TEST_SUITE(FixedPointPixelWiseMultiplication)
+
+TEST_SUITE(QS8)
+
+TEST_SUITE(Scale255)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, Fixture<qint8_t>, PRECOMMIT, SmallShapes(), QS8, QS8, scale_255, TO_NEAREST_UP, 1, 7)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunLarge, Fixture<qint8_t>, NIGHTLY, LargeShapes(), QS8, QS8, scale_255, TO_NEAREST_UP, 1, 7)
+TEST_SUITE_END() // Scale255
+
+TEST_SUITE(ScaleUnity)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, Fixture<qint8_t>, PRECOMMIT, SmallShapes(), QS8, QS8, scale_unity, TO_ZERO, 1, 7)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunLarge, Fixture<qint8_t>, NIGHTLY, LargeShapes(), QS8, QS8, scale_unity, TO_ZERO, 1, 7)
+TEST_SUITE_END() // ScaleUnity
+
+TEST_SUITE(ScaleOther)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther1, Fixture<qint8_t>, PRECOMMIT, SmallShapes(), QS8, QS8, TO_ZERO, 1, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther2, Fixture<qint8_t>, PRECOMMIT, SmallShapes(), QS8, QS8, TO_ZERO, 2, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther3, Fixture<qint8_t>, PRECOMMIT, SmallShapes(), QS8, QS8, TO_ZERO, 3, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther4, Fixture<qint8_t>, PRECOMMIT, SmallShapes(), QS8, QS8, TO_ZERO, 4, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther5, Fixture<qint8_t>, PRECOMMIT, SmallShapes(), QS8, QS8, TO_ZERO, 5, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther6, Fixture<qint8_t>, PRECOMMIT, SmallShapes(), QS8, QS8, TO_ZERO, 6, tolerance)
+
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunLargeOther1, Fixture<qint8_t>, NIGHTLY, LargeShapes(), QS8, QS8, TO_ZERO, 1, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunLargeOther2, Fixture<qint8_t>, NIGHTLY, LargeShapes(), QS8, QS8, TO_ZERO, 2, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunLargeOther3, Fixture<qint8_t>, NIGHTLY, LargeShapes(), QS8, QS8, TO_ZERO, 3, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunLargeOther4, Fixture<qint8_t>, NIGHTLY, LargeShapes(), QS8, QS8, TO_ZERO, 4, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunLargeOther5, Fixture<qint8_t>, NIGHTLY, LargeShapes(), QS8, QS8, TO_ZERO, 5, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunLargeOther6, Fixture<qint8_t>, NIGHTLY, LargeShapes(), QS8, QS8, TO_ZERO, 6, tolerance)
+TEST_SUITE_END() // ScaleOther
+
+TEST_SUITE_END() // QS8
+
+TEST_SUITE(QS16)
+
+TEST_SUITE(Scale255)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, scale_255, TO_NEAREST_UP, 1, 15)
+TEST_SUITE_END() // Scale255
+
+TEST_SUITE(ScaleUnity)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, scale_unity, TO_ZERO, 1, 15)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunLarge, Fixture<qint16_t>, NIGHTLY, LargeShapes(), QS16, QS16, scale_unity, TO_ZERO, 1, 15)
+TEST_SUITE_END() // ScaleUnity
+
+TEST_SUITE(ScaleOther)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther1, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 1, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther2, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 2, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther3, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 3, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther4, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 4, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther5, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 5, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther6, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 6, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther7, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 7, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther8, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 8, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther9, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 9, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther10, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 10, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther11, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 11, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther12, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 12, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther13, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 13, tolerance)
+FP_PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE_OTHER(RunSmallOther14, Fixture<qint16_t>, PRECOMMIT, SmallShapes(), QS16, QS16, TO_ZERO, 14, tolerance)
+TEST_SUITE_END() // ScaleOther
+
+TEST_SUITE_END() // QS16
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/Flatten.cpp b/tests/validation/NEON/Flatten.cpp
index f8d8301..d80faf4 100644
--- a/tests/validation/NEON/Flatten.cpp
+++ b/tests/validation/NEON/Flatten.cpp
@@ -62,7 +62,7 @@
 }
 TEST_SUITE_END()
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEFlattenLayerFixture<half>, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
                                                                                                    framework::dataset::make("DataType", DataType::F16)))
@@ -77,7 +77,7 @@
     validate(Accessor(_target), _reference);
 }
 TEST_SUITE_END()
-#endif // ARM_COMPUTE_ENABLE_FP16
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 TEST_SUITE_END()
 
 TEST_SUITE(Quantized)
diff --git a/tests/validation/NEON/FullyConnectedLayer.cpp b/tests/validation/NEON/FullyConnectedLayer.cpp
index b5ae48a..afdcc05 100644
--- a/tests/validation/NEON/FullyConnectedLayer.cpp
+++ b/tests/validation/NEON/FullyConnectedLayer.cpp
@@ -44,18 +44,18 @@
 {
 /** Tolerance for float operations */
 constexpr RelativeTolerance<float> tolerance_f32(0.01f);
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 constexpr RelativeTolerance<float> tolerance_f16(0.01f);
-#endif /* ARM_COMPUTE_ENABLE_FP16*/
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
 /** Tolerance for fixed point operations */
 constexpr AbsoluteTolerance<float> tolerance_fixed_point(1.f);
 
 /** CNN data types */
 const auto CNNDataTypes = framework::dataset::make("DataType",
 {
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
     DataType::F16,
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
     DataType::F32,
     DataType::QS8,
     DataType::QS16,
@@ -119,7 +119,7 @@
 using NEFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<Tensor, Accessor, NEFullyConnectedLayer, T, true>;
 
 TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(),
                                                                                                                         FullyConnectedParameters),
@@ -136,7 +136,7 @@
     validate(Accessor(_target), _reference, tolerance_f16);
 }
 TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters),
@@ -157,7 +157,7 @@
 template <typename T>
 using NEFullyConnectedLayerFixedPointFixture = FullyConnectedLayerValidationFixedPointFixture<Tensor, Accessor, NEFullyConnectedLayer, T, true>;
 
-TEST_SUITE(Quantized)
+TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
 // Testing for fixed point position [1,6) as reciprocal limits the maximum fixed point position to 5
 FIXTURE_DATA_TEST_CASE(RunSmall, NEFullyConnectedLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(),
diff --git a/tests/validation/NEON/GEMM.cpp b/tests/validation/NEON/GEMM.cpp
index 7844e51..811a6f0 100644
--- a/tests/validation/NEON/GEMM.cpp
+++ b/tests/validation/NEON/GEMM.cpp
@@ -21,11 +21,13 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMM.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/NEON/Accessor.h"
+#include "tests/NEON/Helper.h"
 #include "tests/PaddingCalculator.h"
 #include "tests/datasets/LargeGEMMDataset.h"
 #include "tests/datasets/SmallGEMMDataset.h"
@@ -34,6 +36,7 @@
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
 #include "tests/validation/fixtures/GEMMFixture.h"
+#include "tests/validation/fixtures/GEMMInterleave4x4Fixture.h"
 
 namespace arm_compute
 {
@@ -49,18 +52,59 @@
 /** CNN data types */
 const auto CNNDataTypes = framework::dataset::make("DataType",
 {
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
     DataType::F16,
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
     DataType::F32,
     DataType::QS8,
     DataType::QS16,
 });
+
+const auto data_interleave = framework::dataset::make("M", 8, 12) * framework::dataset::make("N", 8, 12);
 } // namespace
 
 TEST_SUITE(NEON)
 TEST_SUITE(GEMM)
 
+TEST_SUITE(INTERLEAVE_4X4)
+using NEGEMMInterleave4x4 = NESynthetizeFunctionWithZeroConstantBorder<NEGEMMInterleave4x4Kernel, 4>;
+
+TEST_SUITE(FP32)
+using NEGEMMInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<Tensor, Accessor, NEGEMMInterleave4x4, float>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::F32))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QS8)
+using NEGEMMInterleave4x4Fixture = GEMMInterleave4x4ValidationFixedPointFixture<Tensor, Accessor, NEGEMMInterleave4x4, int8_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave *
+                       framework::dataset::make("DataType", DataType::QS8)
+                       * framework::dataset::make("FractionalBits", 1, 7))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(QS16)
+using NEGEMMInterleave4x4Fixture = GEMMInterleave4x4ValidationFixedPointFixture<Tensor, Accessor, NEGEMMInterleave4x4, int16_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave *
+                       framework::dataset::make("DataType", DataType::QS16)
+                       * framework::dataset::make("FractionalBits", 1, 14))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+
+TEST_SUITE_END() // INTERLEAVE_4X4
+
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallGEMMDataset(), datasets::LargeGEMMDataset()), CNNDataTypes),
                shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type)
 {
@@ -87,7 +131,7 @@
 using NEGEMMFixture = GEMMValidationFixture<Tensor, Accessor, NEGEMM, T>;
 
 TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallGEMMDataset(), framework::dataset::make("DataType", DataType::F16)))
 {
@@ -101,7 +145,7 @@
     validate(Accessor(_target), _reference, tolerance_f);
 }
 TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallGEMMDataset(), framework::dataset::make("DataType", DataType::F32)))
diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp
index 3d83f80..a901b44 100644
--- a/tests/validation/NEON/GEMMLowp.cpp
+++ b/tests/validation/NEON/GEMMLowp.cpp
@@ -21,15 +21,26 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#include "arm_compute/core/NEON/kernels/NEGEMMInterleaveBlockedKernel.h"
+#include "arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64Kernel.h"
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMLowp.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/NEON/Accessor.h"
+#include "tests/NEON/Helper.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/LargeGEMMLowpDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/datasets/SmallGEMMLowpDataset.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/GEMMInterleaveBlockedFixture.h"
+#include "tests/validation/fixtures/GEMMLowpAssemblyFixture.h"
 #include "tests/validation/fixtures/GEMMLowpFixture.h"
 
 namespace arm_compute
@@ -40,29 +51,375 @@
 {
 namespace
 {
-constexpr AbsoluteTolerance<float> tolerance_f(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
-
-const auto data_mnk     = framework::dataset::make("M", 8, 12) * framework::dataset::make("N", 8, 12) * framework::dataset::make("K", 8, 12);
-const auto data_offsets = framework::dataset::make("a", -3, 3) * framework::dataset::make("b", -1, 2) * framework::dataset::make("c", 1, 3) * framework::dataset::make("cm", 0,
-                          3)
-                          * framework::dataset::make("shift", 0, 4);
-
+const auto data_int_blk         = framework::dataset::make("M", 8, 12) * framework::dataset::make("N", 8, 12) * framework::dataset::make("by", 8, 13) * framework::dataset::make("block", 4, 9);
+const auto data_int_blk_tr      = framework::dataset::make("M", 8, 17) * framework::dataset::make("N", 8, 14) * framework::dataset::make("by", 12) * framework::dataset::make("block", 4);
+const auto data_matrix_multiply = framework::dataset::make("M", 12, 20) * framework::dataset::make("N", 12, 20) * framework::dataset::make("K", 16);
 } // namespace
 
 TEST_SUITE(NEON)
-TEST_SUITE(GEMMLowp)
+TEST_SUITE(ASSEMBLY_MATRIX_MULTIPLY)
 
-TEST_SUITE(U8)
-using NEGEMMLowpOffsetFixture = GEMMLowpOffsetValidationFixture<Tensor, Accessor, NEGEMMLowp>;
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpOffsetFixture, framework::DatasetMode::PRECOMMIT, data_mnk *data_offsets)
+using NEGEMMAssemblyFixture_S8 = GEMMLowpAssemblyFixture<Tensor, Accessor, NEGEMMLowpAssemblyMatrixMultiplyCore, int8_t>;
+using NEGEMMAssemblyFixture_U8 = GEMMLowpAssemblyFixture<Tensor, Accessor, NEGEMMLowpAssemblyMatrixMultiplyCore, uint8_t>;
+
+TEST_SUITE(S8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMAssemblyFixture_S8, framework::DatasetMode::PRECOMMIT, data_matrix_multiply)
 {
     // Validate output
-    validate(Accessor(_target), _reference, tolerance_f);
+    validate(Accessor(_target), _reference);
 }
 TEST_SUITE_END()
 
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMAssemblyFixture_U8, framework::DatasetMode::PRECOMMIT, data_matrix_multiply)
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
 TEST_SUITE_END()
 TEST_SUITE_END()
+
+TEST_SUITE(GEMMLowp)
+
+TEST_SUITE(INTERLEAVE_BLOCKED)
+
+using NEInterleaveBlocked            = NESynthetizeFunction<NEGEMMInterleaveBlockedKernel>;
+using NEGEMMInterleaveBlockedFixture = GEMMInterleaveBlockedValidationFixture<Tensor, Accessor, NEInterleaveBlocked>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMInterleaveBlockedFixture, framework::DatasetMode::PRECOMMIT, data_int_blk)
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(INTERLEAVE_BLOCKED_TRANSPOSED)
+using NEInterleaveBlockedTransposed            = NESynthetizeFunction<NEGEMMInterleaveBlockedKernel>;
+using NEGEMMInterleaveBlockedTransposedFixture = GEMMInterleaveBlockedValidationFixture<Tensor, Accessor, NEInterleaveBlockedTransposed, true>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMInterleaveBlockedTransposedFixture, framework::DatasetMode::PRECOMMIT, data_int_blk_tr)
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+TEST_SUITE_END()
+
+TEST_SUITE(MatrixMultiplyCore)
+using NEGEMMLowpMatrixMultiplyCoreFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::concat(datasets::SmallGEMMLowpDataset(), datasets::LargeGEMMLowpDataset()),
+               shape_a, shape_b, shape_c, a_offset, b_offset)
+{
+    // Create tensors
+    Tensor a = create_tensor<Tensor>(shape_a, DataType::QASYMM8);
+    Tensor b = create_tensor<Tensor>(shape_b, DataType::QASYMM8);
+    Tensor c = create_tensor<Tensor>(shape_c, DataType::S32);
+
+    a.info()->set_quantization_info(QuantizationInfo(1.0f / 255, a_offset));
+    b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset));
+
+    ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NEGEMMLowpMatrixMultiplyCore gemmlowp_mm;
+    gemmlowp_mm.configure(&a, &b, &c);
+}
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+    framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Input not a multiple of 4
+                                             TensorInfo(TensorShape(21U, 13U), 1, DataType::QS8, 2),                                 // Mismatching data type
+                                             TensorInfo(TensorShape(20U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions
+                                             TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions
+                                             TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)),
+                                          }),
+    framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
+                                            TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
+                                            TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
+                                            TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
+                                            TensorInfo(TensorShape(64U, 16U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
+                                          })),
+    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
+                                            TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
+                                            TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
+                                            TensorInfo(TensorShape(8U, 11U), 1, DataType::S32),
+                                            TensorInfo(TensorShape(64U, 32U), 1, DataType::S32),
+                                           })),
+    framework::dataset::make("Expected", { false, false, false, false, true })),
+    a_info, b_info, output_info, expected)
+{
+    // Lock tensors
+    Status status =  NEGEMMLowpMatrixMultiplyCore::validate(&a_info.clone()->set_is_resizable(false),
+                                                            &b_info.clone()->set_is_resizable(false),
+                                                            &output_info.clone()->set_is_resizable(false));
+    ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset())
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset())
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+TEST_SUITE_END() // MatrixMultiplyCore
+
+TEST_SUITE(OutputStage)
+
+TEST_SUITE(QuantizeDownInt32ToUint8Scale)
+
+const auto quantize_down_int32_to_uint8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
+                                                      3)
+                                                      * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+
+const auto quantize_down_int32_to_uint8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1,
+                                                           2)
+                                                           * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", 0, 2) * framework::dataset::make("max", 171, 174) * framework::dataset::make("addBias", { false, true });
+
+using NEGEMMLowpQuantizeDownInt32ToUint8ScaleFixture = GEMMLowpQuantizeDownInt32ToUint8ScaleValidationFixture<Tensor, Accessor, NEGEMMLowpQuantizeDownInt32ToUint8Scale>;
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
+    framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16
+                                             TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max
+                                             TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type
+                                          }),
+    framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32),
+                                            TensorInfo(TensorShape(21U), 1, DataType::S32),
+                                            TensorInfo(TensorShape(20U), 1, DataType::S32),
+                                          })),
+    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8),
+                                            TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8),
+                                            TensorInfo(TensorShape(20U, 13U), 1, DataType::S32),
+                                           })),
+    framework::dataset::make("Min",{        0,
+                                            8,
+                                            13,
+                                           })),
+    framework::dataset::make("Max",{        205,
+                                            300,
+                                            180,
+                                           })),
+    framework::dataset::make("Expected", { true, false, false })),
+    a_info, b_info, output_info, min, max, expected)
+{
+    // Lock tensors
+    Status status =  NEGEMMLowpQuantizeDownInt32ToUint8Scale::validate(&a_info.clone()->set_is_resizable(false),
+                                                                     &b_info.clone()->set_is_resizable(false),
+                                                                     &output_info.clone()->set_is_resizable(false),
+                                                                     min,
+                                                                     max);
+    ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), quantize_down_int32_to_uint8_scale_cases),
+               shape, result_offset, result_mult_int, result_shift, min, max, add_bias)
+{
+    TensorShape shape_bias(shape[0]);
+
+    // Create tensors
+    Tensor in   = create_tensor<Tensor>(shape, DataType::S32);
+    Tensor bias = create_tensor<Tensor>(shape_bias, DataType::S32);
+    Tensor out  = create_tensor<Tensor>(shape, DataType::QASYMM8);
+
+    ARM_COMPUTE_EXPECT(in.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(out.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NEGEMMLowpQuantizeDownInt32ToUint8Scale output_stage;
+    output_stage.configure(&in, add_bias ? &bias : nullptr, &out, result_offset, result_mult_int, result_shift, min, max);
+
+    // Validate valid region input and output
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(in.info()->valid_region(), valid_region);
+    validate(out.info()->valid_region(), valid_region);
+
+    // Validate valid region bias
+    if(add_bias)
+    {
+        const ValidRegion valid_region_bias = shape_to_valid_region(shape_bias);
+        validate(bias.info()->valid_region(), valid_region_bias);
+    }
+
+    // Validate padding
+    const PaddingSize padding(0);
+    validate(in.info()->padding(), padding);
+    validate(out.info()->padding(), padding);
+
+    if(add_bias)
+    {
+        validate(bias.info()->padding(), padding);
+    }
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToUint8ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpQuantizeDownInt32ToUint8ScaleFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), quantize_down_int32_to_uint8_scale_cases))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+TEST_SUITE(BoundedReLu)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToUint8ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_relu_cases))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpQuantizeDownInt32ToUint8ScaleFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), quantize_down_int32_to_uint8_scale_relu_cases))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // BoundedReLu
+
+TEST_SUITE_END() // QuantizeDownInt32ToUint8Scale
+
+TEST_SUITE(QuantizeDownInt32ToUint8ScaleByFixedPoint)
+
+const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
+                                                                    2)
+                                                                    * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 0) * framework::dataset::make("addBias", { false, true });
+
+const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
+                                                                         2)
+                                                                         * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0, 2) * framework::dataset::make("max", 171, 174) * framework::dataset::make("addBias", { false, true });
+
+using NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture =
+    GEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointValidationFixture<Tensor, Accessor, NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint>;
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
+    framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Input not a multiple of 16
+                                             TensorInfo(TensorShape(21U, 13U), 1, DataType::S32), // Invalid min and max
+                                             TensorInfo(TensorShape(20U, 13U), 1, DataType::S32), // Wrong output data type
+                                          }),
+    framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(21U), 1, DataType::S32),
+                                            TensorInfo(TensorShape(21U), 1, DataType::S32),
+                                            TensorInfo(TensorShape(20U), 1, DataType::S32),
+                                          })),
+    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8),
+                                            TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8),
+                                            TensorInfo(TensorShape(20U, 13U), 1, DataType::S32),
+                                           })),
+    framework::dataset::make("Min",{        0,
+                                            8,
+                                            13,
+                                           })),
+    framework::dataset::make("Max",{        205,
+                                            300,
+                                            180,
+                                           })),
+    framework::dataset::make("Expected", { true, false, false })),
+    a_info, b_info, output_info, min, max, expected)
+{
+    // Lock tensors
+    Status status =  NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::validate(&a_info.clone()->set_is_resizable(false),
+                                                                                 &b_info.clone()->set_is_resizable(false),
+                                                                                 &output_info.clone()->set_is_resizable(false),
+                                                                                 min,
+                                                                                 max);
+    ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()),
+                                                                   quantize_down_int32_to_uint8_scale_by_fixedpoint_cases),
+               shape, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max, add_bias)
+{
+    TensorShape shape_bias(shape[0]);
+
+    // Create tensors
+    Tensor in   = create_tensor<Tensor>(shape, DataType::S32);
+    Tensor bias = create_tensor<Tensor>(shape_bias, DataType::S32);
+    Tensor out  = create_tensor<Tensor>(shape, DataType::QASYMM8);
+
+    ARM_COMPUTE_EXPECT(in.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(out.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint output_stage;
+    output_stage.configure(&in, add_bias ? &bias : nullptr, &out, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
+
+    // Validate valid region input and output
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(in.info()->valid_region(), valid_region);
+    validate(out.info()->valid_region(), valid_region);
+
+    // Validate valid region bias
+    if(add_bias)
+    {
+        const ValidRegion valid_region_bias = shape_to_valid_region(shape_bias);
+        validate(bias.info()->valid_region(), valid_region_bias);
+    }
+
+    // Validate padding
+    const PaddingSize padding(0);
+    validate(in.info()->padding(), padding);
+    validate(out.info()->padding(), padding);
+
+    if(add_bias)
+    {
+        validate(bias.info()->padding(), padding);
+    }
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
+                       quantize_down_int32_to_uint8_scale_by_fixedpoint_cases))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(),
+                       quantize_down_int32_to_uint8_scale_by_fixedpoint_cases))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+TEST_SUITE(BoundedReLu)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
+                       quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(),
+                       quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // BoundedReLu
+
+TEST_SUITE_END() // QuantizeDownInt32ToUint8ScaleByFixedPoint
+TEST_SUITE_END() // OutputStage
+
+TEST_SUITE_END() // GEMMLowp
+TEST_SUITE_END() // NEON
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/NEON/GaussianPyramid.cpp b/tests/validation/NEON/GaussianPyramid.cpp
new file mode 100644
index 0000000..a9130cf
--- /dev/null
+++ b/tests/validation/NEON/GaussianPyramid.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+*/
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/GaussianPyramidHalfFixture.h"
+#include "tests/validation/reference/Utils.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<float> tolerance_fp32(1.0f); /**< Tolerance value for comparing reference's output against implementation's output */
+
+const auto small_gaussian_pyramid_levels = combine(datasets::Medium2DShapes(), datasets::BorderModes()) * framework::dataset::make("numlevels", 2, 3);
+const auto large_gaussian_pyramid_levels = combine(datasets::Large2DShapes(), datasets::BorderModes()) * framework::dataset::make("numlevels", 2, 5);
+
+template <typename T, typename U>
+inline void validate_gaussian_pyramid(const Pyramid &target, const std::vector<SimpleTensor<T>> &reference, BorderMode border_mode, U tolerance)
+{
+    ValidRegion prev_valid_region = shape_to_valid_region(reference[0].shape());
+
+    for(size_t i = 1; i < reference.size(); ++i)
+    {
+        const ValidRegion valid_region = shape_to_valid_region_gaussian_pyramid_half(reference[i - 1].shape(), prev_valid_region, (border_mode == BorderMode::UNDEFINED));
+
+        // Validate outputs
+        validate(Accessor(*(target.get_pyramid_level(i))), reference[i], valid_region, tolerance);
+
+        // Keep the valid region for the next level
+        prev_valid_region = valid_region;
+    }
+}
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(GaussianPyramid)
+TEST_SUITE(Half)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, large_gaussian_pyramid_levels,
+               shape, border_mode, num_levels)
+{
+    Tensor src = create_tensor<Tensor>(shape, DataType::U8);
+
+    // Create pyramid
+    PyramidInfo pyramid_info(num_levels, SCALE_PYRAMID_HALF, shape, Format::U8);
+    Pyramid     dst;
+    dst.init(pyramid_info);
+
+    NEGaussianPyramidHalf gaussian_pyramid_half;
+    gaussian_pyramid_half.configure(&src, &dst, border_mode, 0);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    for(size_t level = 0; level < pyramid_info.num_levels(); ++level)
+    {
+        ARM_COMPUTE_EXPECT(dst.get_pyramid_level(level)->info()->is_resizable(), framework::LogLevel::ERRORS);
+    }
+}
+
+template <typename T>
+using NEGaussianPyramidHalfFixture = GaussianPyramidHalfValidationFixture<Tensor, Accessor, NEGaussianPyramidHalf, T, Pyramid>;
+
+FIXTURE_DATA_TEST_CASE(RunSmallGaussianPyramidHalf, NEGaussianPyramidHalfFixture<uint8_t>, framework::DatasetMode::ALL, small_gaussian_pyramid_levels)
+{
+    validate_gaussian_pyramid(_target, _reference, _border_mode, tolerance_fp32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeGaussianPyramidHalf, NEGaussianPyramidHalfFixture<uint8_t>, framework::DatasetMode::NIGHTLY, large_gaussian_pyramid_levels)
+{
+    validate_gaussian_pyramid(_target, _reference, _border_mode, tolerance_fp32);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/HarrisCorners.cpp b/tests/validation/NEON/HarrisCorners.cpp
index 6d66549..fa8d3cb 100644
--- a/tests/validation/NEON/HarrisCorners.cpp
+++ b/tests/validation/NEON/HarrisCorners.cpp
@@ -44,11 +44,16 @@
 {
 namespace
 {
+/* Allowed percentage of keypoints missing for target */
+float allowed_missing_percentage = 10.f;
+/* Allowed percentage of keypoints mismatching between target and reference */
+float allowed_mismatch_percentage = 10.f;
+
 const auto use_fp16 = framework::dataset::make("UseFP16",
 {
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
     true,
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
     false
 });
 
@@ -106,14 +111,26 @@
 {
     // Validate output
     ArrayAccessor<KeyPoint> array(_target);
-    validate_keypoints(array.buffer(), array.buffer() + array.num_values(), _reference.begin(), _reference.end(), RelativeTolerance<float>(0.0001f));
+    validate_keypoints(array.buffer(),
+                       array.buffer() + array.num_values(),
+                       _reference.begin(),
+                       _reference.end(),
+                       RelativeTolerance<float>(0.0001f),
+                       allowed_missing_percentage,
+                       allowed_mismatch_percentage);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, NEHarrisCornersFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), data), framework::dataset::make("Format", Format::U8)))
 {
     // Validate output
     ArrayAccessor<KeyPoint> array(_target);
-    validate_keypoints(array.buffer(), array.buffer() + array.num_values(), _reference.begin(), _reference.end(), RelativeTolerance<float>(0.0001f));
+    validate_keypoints(array.buffer(),
+                       array.buffer() + array.num_values(),
+                       _reference.begin(),
+                       _reference.end(),
+                       RelativeTolerance<float>(0.0001f),
+                       allowed_missing_percentage,
+                       allowed_mismatch_percentage);
 }
 
 TEST_SUITE_END()
diff --git a/tests/validation/NEON/Histogram.cpp b/tests/validation/NEON/Histogram.cpp
new file mode 100644
index 0000000..d2b53b7
--- /dev/null
+++ b/tests/validation/NEON/Histogram.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/Distribution1D.h"
+#include "arm_compute/runtime/NEON/functions/NEHistogram.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/HistogramFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(NEON)
+TEST_SUITE(Histogram)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()),
+                                                                   framework::dataset::make("DataType", DataType::U8)),
+               shape, data_type)
+{
+    // Setup Distribution
+    std::mt19937                            gen(library->seed());
+    std::uniform_int_distribution<size_t>   distribution_size_t(1, 30);
+    const size_t                            num_bins = distribution_size_t(gen);
+    std::uniform_int_distribution<int32_t>  distribution_int32_t(0, 125);
+    const size_t                            offset = distribution_int32_t(gen);
+    std::uniform_int_distribution<uint32_t> distribution_uint32_t(1, 255 - offset);
+    const size_t                            range = distribution_uint32_t(gen);
+    Distribution1D                          distribution_dst(num_bins, offset, range);
+
+    // Create tensors
+    Tensor      src = create_tensor<Tensor>(shape, data_type);
+    TensorShape dst_shape(num_bins);
+    Tensor      dst = create_tensor<Tensor>(dst_shape, DataType::U32);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NEHistogram histogram;
+    histogram.configure(&src, &distribution_dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    const ValidRegion valid_region_dst = shape_to_valid_region(dst_shape);
+    validate(dst.info()->valid_region(), valid_region_dst);
+
+    // Validate padding
+    const PaddingSize padding;
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+template <typename T>
+using NEHistogramFixture = HistogramValidationFixture<Tensor, Accessor, NEHistogram, T, Distribution1D>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEHistogramFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
+                                                                                                         DataType::U8)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEHistogramFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
+                                                                                                       DataType::U8)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/Im2Col.cpp b/tests/validation/NEON/Im2Col.cpp
new file mode 100644
index 0000000..4faa7d7
--- /dev/null
+++ b/tests/validation/NEON/Im2Col.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEIm2Col.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(NEON)
+TEST_SUITE(Im2Col)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(10U, 12U, 2U), 1, DataType::U8),      // Unsupported data type
+                                                       TensorInfo(TensorShape(10U, 12U, 2U), 1, DataType::F32),     // Mismatching data type
+                                                       TensorInfo(TensorShape(10U, 12U, 2U), 1, DataType::QS8, 2),  // Mismatching fixed point
+                                                       TensorInfo(TensorShape(10U, 12U, 2U), 1, DataType::QASYMM8), // Bias not supported with QASYMM8
+                                                       TensorInfo(TensorShape(10U, 12U, 2U), 1, DataType::QASYMM8),
+                                                     }),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(3U, 4U, 10U, 2U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(3U, 4U, 10U, 2U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(3U, 4U, 10U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(3U, 3U, 10U, 2U), 1, DataType::QASYMM8),
+                                                       TensorInfo(TensorShape(3U, 4U, 10U, 2U), 1, DataType::QASYMM8),
+                                                     })),
+               framework::dataset::make("HasBias", { true, true, true, true, false })),
+               framework::dataset::make("Expected", { false, false, false, false, true })),
+               input_info, output_info, has_bias, expected)
+{
+    bool err = bool(NEIm2Col::validate(&input_info, &output_info, Size2D(3U, 3U), PadStrideInfo(), has_bias));
+    ARM_COMPUTE_EXPECT(err == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/L2Normalize.cpp b/tests/validation/NEON/L2NormalizeLayer.cpp
similarity index 84%
rename from tests/validation/NEON/L2Normalize.cpp
rename to tests/validation/NEON/L2NormalizeLayer.cpp
index ceffa6d..c0f5920 100644
--- a/tests/validation/NEON/L2Normalize.cpp
+++ b/tests/validation/NEON/L2NormalizeLayer.cpp
@@ -22,7 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEL2Normalize.h"
+#include "arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/NEON/Accessor.h"
@@ -32,7 +32,7 @@
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/L2NormalizeFixture.h"
+#include "tests/validation/fixtures/L2NormalizeLayerFixture.h"
 
 namespace arm_compute
 {
@@ -47,20 +47,20 @@
 } // namespace
 
 TEST_SUITE(NEON)
-TEST_SUITE(L2Normalize)
+TEST_SUITE(L2NormalizeLayer)
 
 template <typename T>
-using NEL2NormalizeFixture = L2NormalizeValidationFixture<Tensor, Accessor, NEL2Normalize, T>;
+using NEL2NormalizeLayerFixture = L2NormalizeLayerValidationFixture<Tensor, Accessor, NEL2NormalizeLayer, T>;
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEL2NormalizeFixture<float>, framework::DatasetMode::PRECOMMIT,
+FIXTURE_DATA_TEST_CASE(RunSmall, NEL2NormalizeLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
                        combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f32);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, NEL2NormalizeFixture<float>, framework::DatasetMode::NIGHTLY,
+FIXTURE_DATA_TEST_CASE(RunLarge, NEL2NormalizeLayerFixture<float>, framework::DatasetMode::NIGHTLY,
                        combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 })))
 {
     // Validate output
diff --git a/tests/validation/NEON/Magnitude.cpp b/tests/validation/NEON/Magnitude.cpp
new file mode 100644
index 0000000..3b7562b
--- /dev/null
+++ b/tests/validation/NEON/Magnitude.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEMagnitude.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/MagnitudeFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+template <typename T>
+AbsoluteTolerance<T> tolerance(MagnitudeType magnitude_type)
+{
+    return AbsoluteTolerance<T>((MagnitudeType::L1NORM == magnitude_type) ? 0 : 1);
+}
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+template <>
+AbsoluteTolerance<half_float::half> tolerance(MagnitudeType magnitude_type)
+{
+    return AbsoluteTolerance<half_float::half>((MagnitudeType::L1NORM == magnitude_type) ? half(0.0) : half(1.0));
+}
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(Magnitude)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::S16)), shape, data_type)
+{
+    // Create tensors
+    Tensor src1 = create_tensor<Tensor>(shape, data_type);
+    Tensor src2 = create_tensor<Tensor>(shape, data_type);
+    Tensor dst  = create_tensor<Tensor>(shape, data_type);
+
+    ARM_COMPUTE_EXPECT(src1.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(src2.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function (default MagnitudeType::L2NORM)
+    NEMagnitude magnitude;
+    magnitude.configure(&src1, &src2, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src1.info()->padding(), padding);
+    validate(src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+template <typename T>
+using NEMagnitudeFixture = MagnitudeValidationFixture<Tensor, Accessor, NEMagnitude, T>;
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEMagnitudeFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), framework::dataset::make("Format", Format::S16)),
+                                                                                                                 framework::dataset::make("MagnitudeType", { MagnitudeType::L1NORM, MagnitudeType::L2NORM })),
+                                                                                                         framework::dataset::make("UseFP16", false)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance<int16_t>(_magnitude_type));
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEMagnitudeFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), framework::dataset::make("Format", Format::S16)),
+                                                                                                               framework::dataset::make("MagnitudeType", { MagnitudeType::L1NORM, MagnitudeType::L2NORM })),
+                                                                                                       framework::dataset::make("UseFP16", false)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance<int16_t>(_magnitude_type));
+}
+TEST_SUITE_END() // S16
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+TEST_SUITE(F16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEMagnitudeFixture<half_float::half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), framework::dataset::make("Format",
+                                                                                                                  Format::S16)),
+                                                                                                                  framework::dataset::make("MagnitudeType", { MagnitudeType::L1NORM, MagnitudeType::L2NORM })),
+                                                                                                                  framework::dataset::make("UseFP16", true)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance<half_float::half>(_magnitude_type));
+}
+TEST_SUITE_END() // F16
+#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/Median3x3.cpp b/tests/validation/NEON/Median3x3.cpp
new file mode 100644
index 0000000..89c8c41
--- /dev/null
+++ b/tests/validation/NEON/Median3x3.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEMedian3x3.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/Median3x3Fixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr unsigned int filter_size = 3;              /* Size of the kernel/filter in number of elements. */
+constexpr BorderSize   border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(Median3x3)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+                                                                   datasets::BorderModes()),
+               shape, data_type, border_mode)
+{
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, data_type);
+    Tensor dst = create_tensor<Tensor>(shape, data_type);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NEMedian3x3 median3x3;
+    median3x3.configure(&src, &dst, border_mode);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), border_size);
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+    calculator.set_border_size(1);
+    calculator.set_border_mode(border_mode);
+
+    const PaddingSize dst_padding = calculator.required_padding();
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-1);
+
+    const PaddingSize src_padding = calculator.required_padding();
+
+    validate(src.info()->padding(), src_padding);
+    validate(dst.info()->padding(), dst_padding);
+}
+
+template <typename T>
+using NEMedian3x3Fixture = Median3x3ValidationFixture<Tensor, Accessor, NEMedian3x3, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEMedian3x3Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)),
+                                                                                                         datasets::BorderModes()))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEMedian3x3Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                               DataType::U8)),
+                                                                                                       datasets::BorderModes()))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/NormalizationLayer.cpp b/tests/validation/NEON/NormalizationLayer.cpp
index e0098ad..3654473 100644
--- a/tests/validation/NEON/NormalizationLayer.cpp
+++ b/tests/validation/NEON/NormalizationLayer.cpp
@@ -44,27 +44,67 @@
 namespace
 {
 /** Tolerance for float operations */
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 constexpr AbsoluteTolerance<float> tolerance_f16(0.001f);
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 constexpr AbsoluteTolerance<float> tolerance_f32(0.00001f);
 /** Tolerance for fixed point operations */
 constexpr AbsoluteTolerance<int8_t>  tolerance_qs8(2);
 constexpr AbsoluteTolerance<int16_t> tolerance_qs16(4);
 
 /** Input data set. */
-const auto NormalizationDataset = combine(combine(combine(datasets::SmallShapes(), datasets::NormalizationTypes()), framework::dataset::make("NormalizationSize", 3, 9, 2)),
-                                          framework::dataset::make("Beta", { 0.5f, 1.f, 2.f }));
+const auto NormalizationDataset = combine(combine(combine(combine(datasets::SmallShapes(), datasets::NormalizationTypes()), framework::dataset::make("NormalizationSize", 3, 9, 2)),
+                                                  framework::dataset::make("Beta", { 0.5f, 1.f, 2.f })),
+                                          framework::dataset::make("IsScaled", { true }));
+const auto NormalizationDatasetFP32 = combine(combine(combine(combine(datasets::SmallShapes(), datasets::NormalizationTypes()), framework::dataset::make("NormalizationSize", 3, 9, 2)),
+                                                      framework::dataset::make("Beta", { 0.5f, 1.f, 2.f })),
+                                              framework::dataset::make("IsScaled", { true, false }));
 } // namespace
 
 TEST_SUITE(NEON)
 TEST_SUITE(NormalizationLayer)
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+    framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type input/output
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching shapes
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Even normalization
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Non implemented IN_MAP_2D
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 4), // Mismatching fixed point position
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Window shrink
+                                            TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32, 0),
+                                          }),
+    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16, 0),
+                                            TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 3),
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32, 0),
+                                          })),
+    framework::dataset::make("NormInfo",  { NormalizationLayerInfo(NormType::IN_MAP_1D, 5),
+                                            NormalizationLayerInfo(NormType::IN_MAP_1D, 5),
+                                            NormalizationLayerInfo(NormType::IN_MAP_1D, 4),
+                                            NormalizationLayerInfo(NormType::IN_MAP_2D, 5),
+                                            NormalizationLayerInfo(NormType::IN_MAP_1D, 5),
+                                            NormalizationLayerInfo(NormType::IN_MAP_1D, 5),
+                                            NormalizationLayerInfo(NormType::CROSS_MAP, 1),
+                                           })),
+    framework::dataset::make("Expected", { false, false, false, false, false, false, true })),
+    input_info, output_info, norm_info, expected)
+{
+    bool is_valid = bool(NENormalizationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), norm_info));
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 template <typename T>
 using NENormalizationLayerFixture = NormalizationValidationFixture<Tensor, Accessor, NENormalizationLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NENormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F16)))
 {
@@ -77,15 +117,15 @@
     validate(Accessor(_target), _reference, tolerance_f16);
 }
 TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NENormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NENormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(NormalizationDatasetFP32, framework::dataset::make("DataType", DataType::F32)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NENormalizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NENormalizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(NormalizationDatasetFP32, framework::dataset::make("DataType", DataType::F32)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f32);
diff --git a/tests/validation/NEON/Phase.cpp b/tests/validation/NEON/Phase.cpp
new file mode 100644
index 0000000..8e2ef23
--- /dev/null
+++ b/tests/validation/NEON/Phase.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEPhase.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/PhaseFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<uint8_t> tolerance_value(1);
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(Phase)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::S16)), shape, data_type)
+{
+    // Create tensors
+    Tensor src1 = create_tensor<Tensor>(shape, data_type);
+    Tensor src2 = create_tensor<Tensor>(shape, data_type);
+    Tensor dst  = create_tensor<Tensor>(shape, DataType::U8);
+
+    ARM_COMPUTE_EXPECT(src1.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(src2.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NEPhase phase;
+    phase.configure(&src1, &src2, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src1.info()->padding(), padding);
+    validate(src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+template <typename T>
+using NEPhaseFixture = PhaseValidationFixture<Tensor, Accessor, NEPhase, T>;
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPhaseFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::Small2DShapes(), framework::dataset::make("Format", Format::S16)),
+                                                                                                     framework::dataset::make("PhaseType", { PhaseType::UNSIGNED, PhaseType::SIGNED })))
+{
+    // Validate output
+    validate_wrap(Accessor(_target), _reference, tolerance_value, 0);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEPhaseFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), framework::dataset::make("Format", Format::S16)),
+                                                                                                   framework::dataset::make("PhaseType", { PhaseType::UNSIGNED, PhaseType::SIGNED })))
+{
+    // Validate output
+    validate_wrap(Accessor(_target), _reference, tolerance_value, 0);
+}
+TEST_SUITE_END() // S16
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/PixelWiseMultiplication.cpp b/tests/validation/NEON/PixelWiseMultiplication.cpp
new file mode 100644
index 0000000..44b4ff2
--- /dev/null
+++ b/tests/validation/NEON/PixelWiseMultiplication.cpp
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/PixelWiseMultiplicationFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+const float scale_unity = 1.f;
+const float scale_255   = 1.f / 255.f;
+const float scale_other = 1.f / 32768.f;
+
+#define DEFAULT_VALIDATE validate(Accessor(_target), _reference);
+#define VALIDATE(TYPE, TOLERANCE) validate(Accessor(_target), _reference, AbsoluteTolerance<TYPE>(TOLERANCE), 0.f);
+#define WRAP_VALIDATE(TYPE, TOLERANCE) validate_wrap(Accessor(_target), _reference, AbsoluteTolerance<TYPE>(TOLERANCE), 0.f);
+
+// *INDENT-OFF*
+// clang-format off
+#define PIXEL_WISE_MULTIPLICATION_DATA_TEST_CASE(DT1, DT2, SCALE, RP)                                            \
+    DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL,                                                   \
+                   combine(combine(combine(combine(combine(                                                      \
+                   concat(datasets::SmallShapes(), datasets::LargeShapes()),                                     \
+                   framework::dataset::make("DataType1", DataType::DT1)),                                        \
+                   framework::dataset::make("DataType2", DataType::DT2)),                                        \
+                   framework::dataset::make("Scale", std::move(SCALE))),                                         \
+                   datasets::ConvertPolicies()),                                                                 \
+                   framework::dataset::make("RoundingPolicy", RoundingPolicy::RP)),                              \
+                   shape, dt1, dt2, scale, convert_policy, rounding_policy)                                      \
+    {                                                                                                            \
+        validate_configuration(shape, dt1, dt2, scale, convert_policy, rounding_policy);                         \
+    }
+
+#define PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(TEST_NAME, FIXTURE, MODE, SHAPES, DT1, DT2, SCALE, RP, VALIDATE) \
+    FIXTURE_DATA_TEST_CASE(TEST_NAME, NEPixelWiseMultiplication##FIXTURE, framework::DatasetMode::MODE,                   \
+                           combine(combine(combine(combine(combine(                                                       \
+                           datasets::SHAPES,                                                                              \
+                           framework::dataset::make("DataType1", DataType::DT1)),                                         \
+                           framework::dataset::make("DataType2", DataType::DT2)),                                         \
+                           framework::dataset::make("Scale", std::move(SCALE))),                                          \
+                           datasets::ConvertPolicies()),                                                                  \
+                           framework::dataset::make("RoundingPolicy", RoundingPolicy::RP)))                               \
+    {                                                                                                                     \
+        VALIDATE                                                                                                          \
+    }
+// *INDENT-ON*
+// clang-format on
+
+void validate_configuration(TensorShape shape, DataType dt1, DataType dt2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
+{
+    Tensor src1 = create_tensor<Tensor>(shape, dt1);
+    Tensor src2 = create_tensor<Tensor>(shape, dt2);
+    Tensor dst  = create_tensor<Tensor>(shape, dt2);
+
+    ARM_COMPUTE_EXPECT(src1.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(src2.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NEPixelWiseMultiplication multiply;
+    multiply.configure(&src1, &src2, &dst, scale, convert_policy, rounding_policy);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src1.info()->valid_region(), valid_region);
+    validate(src2.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src1.info()->padding(), padding);
+    validate(src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+} // namespace
+
+template <typename T>
+using NEPixelWiseMultiplicationToU8Fixture = PixelWiseMultiplicationValidationFixture<Tensor, Accessor, NEPixelWiseMultiplication, T, uint8_t>;
+template <typename T>
+using NEPixelWiseMultiplicationToS16Fixture = PixelWiseMultiplicationValidationFixture<Tensor, Accessor, NEPixelWiseMultiplication, T, int16_t>;
+template <typename T>
+using NEPixelWiseMultiplicationToF16Fixture = PixelWiseMultiplicationValidationFixture<Tensor, Accessor, NEPixelWiseMultiplication, T, half_float::half>;
+template <typename T>
+using NEPixelWiseMultiplicationToF32Fixture = PixelWiseMultiplicationValidationFixture<Tensor, Accessor, NEPixelWiseMultiplication, T, float>;
+template <typename T>
+using NEPixelWiseMultiplicationToQS8Fixture = PixelWiseMultiplicationValidationFixture<Tensor, Accessor, NEPixelWiseMultiplication, T, qint8_t>;
+template <typename T>
+using NEPixelWiseMultiplicationToQS16Fixture = PixelWiseMultiplicationValidationFixture<Tensor, Accessor, NEPixelWiseMultiplication, T, qint16_t>;
+
+TEST_SUITE(NEON)
+TEST_SUITE(PixelWiseMultiplication)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
+               framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),      // Window shrink
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid scale
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid data type combination
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),  // Mismatching data type
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),  // Mismatching fixed point
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),  // Invalid scale
+                                                      }),
+               framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
+                                                       TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS16, 2),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                     })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+                                                       TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 3),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QS8, 2),
+                                                     })),
+               framework::dataset::make("Scale",{  scale_unity, scale_unity, scale_unity, -1.f, scale_unity, scale_unity, scale_unity, scale_unity, 3.f})),
+               framework::dataset::make("Expected", { true, true, false, false, false, false, false, false, false })),
+               input1_info, input2_info, output_info, scale, expected)
+{
+    bool has_error = bool(NEPixelWiseMultiplication::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), scale, ConvertPolicy::WRAP, RoundingPolicy::TO_ZERO));
+    ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+TEST_SUITE(U8toU8)
+
+TEST_SUITE(Scale255)
+PIXEL_WISE_MULTIPLICATION_DATA_TEST_CASE(U8, U8, scale_255, TO_NEAREST_UP)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToU8Fixture<uint8_t>, PRECOMMIT, SmallShapes(), U8, U8, scale_255, TO_NEAREST_UP, WRAP_VALIDATE(uint8_t, 1))
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunLarge, ToU8Fixture<uint8_t>, NIGHTLY, LargeShapes(), U8, U8, scale_255, TO_NEAREST_UP, WRAP_VALIDATE(uint8_t, 1))
+TEST_SUITE_END() // Scale255
+
+TEST_SUITE(ScaleUnity)
+PIXEL_WISE_MULTIPLICATION_DATA_TEST_CASE(U8, U8, scale_unity, TO_ZERO)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToU8Fixture<uint8_t>, PRECOMMIT, SmallShapes(), U8, U8, scale_unity, TO_ZERO, DEFAULT_VALIDATE)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunLarge, ToU8Fixture<uint8_t>, NIGHTLY, LargeShapes(), U8, U8, scale_unity, TO_ZERO, DEFAULT_VALIDATE)
+TEST_SUITE_END() // ScaleUnity
+
+TEST_SUITE(ScaleOther)
+PIXEL_WISE_MULTIPLICATION_DATA_TEST_CASE(U8, U8, scale_other, TO_ZERO)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToU8Fixture<uint8_t>, PRECOMMIT, SmallShapes(), U8, U8, scale_other, TO_ZERO, DEFAULT_VALIDATE)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunLarge, ToU8Fixture<uint8_t>, NIGHTLY, LargeShapes(), U8, U8, scale_other, TO_ZERO, DEFAULT_VALIDATE)
+TEST_SUITE_END() // ScaleOther
+
+TEST_SUITE_END() // U8toU8
+
+TEST_SUITE(U8toS16)
+
+TEST_SUITE(Scale255)
+PIXEL_WISE_MULTIPLICATION_DATA_TEST_CASE(U8, S16, scale_255, TO_NEAREST_UP)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS16Fixture<uint8_t>, PRECOMMIT, SmallShapes(), U8, S16, scale_255, TO_NEAREST_UP, WRAP_VALIDATE(int16_t, 2))
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunLarge, ToS16Fixture<uint8_t>, NIGHTLY, LargeShapes(), U8, S16, scale_255, TO_NEAREST_UP, WRAP_VALIDATE(int16_t, 2))
+TEST_SUITE_END() // Scale255
+
+TEST_SUITE(ScaleUnity)
+PIXEL_WISE_MULTIPLICATION_DATA_TEST_CASE(U8, S16, scale_unity, TO_ZERO)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS16Fixture<uint8_t>, PRECOMMIT, SmallShapes(), U8, S16, scale_unity, TO_ZERO, DEFAULT_VALIDATE)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunLarge, ToS16Fixture<uint8_t>, NIGHTLY, LargeShapes(), U8, S16, scale_unity, TO_ZERO, DEFAULT_VALIDATE)
+TEST_SUITE_END() // ScaleUnity
+
+TEST_SUITE(ScaleOther)
+PIXEL_WISE_MULTIPLICATION_DATA_TEST_CASE(U8, S16, scale_other, TO_ZERO)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS16Fixture<uint8_t>, PRECOMMIT, SmallShapes(), U8, S16, scale_other, TO_ZERO, DEFAULT_VALIDATE)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunLarge, ToS16Fixture<uint8_t>, NIGHTLY, LargeShapes(), U8, S16, scale_other, TO_ZERO, DEFAULT_VALIDATE)
+TEST_SUITE_END() // ScaleOther
+
+TEST_SUITE_END() // U8toS16
+
+TEST_SUITE(S16toS16)
+
+TEST_SUITE(Scale255)
+PIXEL_WISE_MULTIPLICATION_DATA_TEST_CASE(S16, S16, scale_255, TO_NEAREST_UP)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS16Fixture<int16_t>, PRECOMMIT, SmallShapes(), S16, S16, scale_255, TO_NEAREST_UP, WRAP_VALIDATE(int16_t, 2))
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunLarge, ToS16Fixture<int16_t>, NIGHTLY, LargeShapes(), S16, S16, scale_255, TO_NEAREST_UP, WRAP_VALIDATE(int16_t, 2))
+TEST_SUITE_END() // Scale255
+
+TEST_SUITE(ScaleUnity)
+PIXEL_WISE_MULTIPLICATION_DATA_TEST_CASE(S16, S16, scale_unity, TO_ZERO)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS16Fixture<int16_t>, PRECOMMIT, SmallShapes(), S16, S16, scale_unity, TO_ZERO, DEFAULT_VALIDATE)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunLarge, ToS16Fixture<int16_t>, NIGHTLY, LargeShapes(), S16, S16, scale_unity, TO_ZERO, DEFAULT_VALIDATE)
+TEST_SUITE_END() // ScaleUnity
+
+TEST_SUITE(ScaleOther)
+PIXEL_WISE_MULTIPLICATION_DATA_TEST_CASE(S16, S16, scale_other, TO_ZERO)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS16Fixture<int16_t>, PRECOMMIT, SmallShapes(), S16, S16, scale_other, TO_ZERO, DEFAULT_VALIDATE)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunLarge, ToS16Fixture<int16_t>, NIGHTLY, LargeShapes(), S16, S16, scale_other, TO_ZERO, DEFAULT_VALIDATE)
+TEST_SUITE_END() // ScaleOther
+
+TEST_SUITE_END() // S16toS16
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+TEST_SUITE(F16toF16)
+
+TEST_SUITE(Scale255)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToF16Fixture<half_float::half>, PRECOMMIT, SmallShapes(), F16, F16, scale_255, TO_NEAREST_UP, VALIDATE(float, 1.f))
+TEST_SUITE_END() // Scale255
+
+TEST_SUITE_END() // F16toF16
+#endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
+TEST_SUITE(F32toF32)
+
+TEST_SUITE(Scale255)
+PIXEL_WISE_MULTIPLICATION_DATA_TEST_CASE(F32, F32, scale_255, TO_NEAREST_UP)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToF32Fixture<float>, PRECOMMIT, SmallShapes(), F32, F32, scale_255, TO_NEAREST_UP, VALIDATE(float, 1.f))
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunLarge, ToF32Fixture<float>, NIGHTLY, LargeShapes(), F32, F32, scale_255, TO_NEAREST_UP, VALIDATE(float, 1.f))
+TEST_SUITE_END() // Scale255
+
+TEST_SUITE(ScaleUnity)
+PIXEL_WISE_MULTIPLICATION_DATA_TEST_CASE(F32, F32, scale_unity, TO_ZERO)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToF32Fixture<float>, PRECOMMIT, SmallShapes(), F32, F32, scale_unity, TO_ZERO, DEFAULT_VALIDATE)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunLarge, ToF32Fixture<float>, NIGHTLY, LargeShapes(), F32, F32, scale_unity, TO_ZERO, DEFAULT_VALIDATE)
+TEST_SUITE_END() // ScaleUnity
+
+TEST_SUITE(ScaleOther)
+PIXEL_WISE_MULTIPLICATION_DATA_TEST_CASE(F32, F32, scale_other, TO_ZERO)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToF32Fixture<float>, PRECOMMIT, SmallShapes(), F32, F32, scale_other, TO_ZERO, DEFAULT_VALIDATE)
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunLarge, ToF32Fixture<float>, NIGHTLY, LargeShapes(), F32, F32, scale_other, TO_ZERO, DEFAULT_VALIDATE)
+TEST_SUITE_END() // ScaleOther
+
+TEST_SUITE_END() // F32toF32
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/PoolingLayer.cpp b/tests/validation/NEON/PoolingLayer.cpp
index 591c4be..a71f5f2 100644
--- a/tests/validation/NEON/PoolingLayer.cpp
+++ b/tests/validation/NEON/PoolingLayer.cpp
@@ -44,17 +44,19 @@
 namespace
 {
 /** Input data set for float data types */
-const auto PoolingLayerDatasetFP = combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { 2, 3, 7, 9 })),
-                                           framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) }));
+const auto PoolingLayerDatasetFP = combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { 2, 3, 7, 9 })),
+                                                   framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
+                                           framework::dataset::make("ExcludePadding", { true, false }));
 
 /** Input data set for quantized data types */
-const auto PoolingLayerDatasetQS = combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { 2, 3 })),
-                                           framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) }));
+const auto PoolingLayerDatasetQS = combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { 2, 3 })),
+                                                   framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
+                                           framework::dataset::make("ExcludePadding", { false }));
 
 constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for float types */
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 constexpr AbsoluteTolerance<float> tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for float types */
-#endif                                                   /* ARM_COMPUTE_ENABLE_FP16 */
+#endif                                                   /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 constexpr AbsoluteTolerance<float> tolerance_qs8(0);     /**< Tolerance value for comparing reference's output against implementation's output for quantized input */
 constexpr AbsoluteTolerance<float> tolerance_qs16(0);    /**< Tolerance value for comparing reference's output against implementation's output for quantized input */
 } // namespace
@@ -62,6 +64,48 @@
 TEST_SUITE(NEON)
 TEST_SUITE(PoolingLayer)
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+    framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),     // Mismatching data type
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),     // Window shrink
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 4),     // Mismatching fixed point position
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS16, 11),   // Window shrink
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),     // Invalid pad/size combination
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),     // Invalid pad/size combination
+                                            TensorInfo(TensorShape(15U, 13U, 5U), 1, DataType::F32, 0),     // Non-rectangular Global Pooling
+                                            TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32, 0),     // Invalid output Global Pooling
+                                            TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32, 0),
+                                          }),
+    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16, 0),
+                                            TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::QS8, 5),
+                                            TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::QS16, 11),
+                                            TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(25U, 16U, 2U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(1U, 1U, 5U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(2U, 2U, 5U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 0),
+                                          })),
+    framework::dataset::make("PoolInfo",  { PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)),
+                                            PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)),
+                                            PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)),
+                                            PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)),
+                                            PoolingLayerInfo(PoolingType::AVG, 2, PadStrideInfo(1, 1, 2, 0)),
+                                            PoolingLayerInfo(PoolingType::AVG, 2, PadStrideInfo(1, 1, 0, 2)),
+                                            PoolingLayerInfo(PoolingType::AVG),
+                                            PoolingLayerInfo(PoolingType::MAX),
+                                            PoolingLayerInfo(PoolingType::AVG),
+                                           })),
+    framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false, true })),
+    input_info, output_info, pool_info, expected)
+{
+    bool is_valid = bool(NEPoolingLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pool_info));
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 template <typename T>
 using NEPoolingLayerFixture = PoolingLayerValidationFixture<Tensor, Accessor, NEPoolingLayer, T>;
 
@@ -81,7 +125,7 @@
 }
 TEST_SUITE_END()
 
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixture<half>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFP,
                                                                                                    framework::dataset::make("DataType", DataType::F16))))
@@ -96,7 +140,7 @@
     validate(Accessor(_target), _reference, tolerance_f16);
 }
 TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 TEST_SUITE_END()
 
 template <typename T>
diff --git a/tests/validation/NEON/Remap.cpp b/tests/validation/NEON/Remap.cpp
new file mode 100644
index 0000000..6e58000
--- /dev/null
+++ b/tests/validation/NEON/Remap.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NERemap.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/RemapFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<uint8_t> tolerance_value(1);
+constexpr float                      tolerance_number = 0.2f;
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(Remap)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                           framework::dataset::make("DataType", DataType::U8)),
+                                                                   framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT })),
+               shape, policy, data_type, border_mode)
+{
+    Tensor src   = create_tensor<Tensor>(shape, data_type);
+    Tensor map_x = create_tensor<Tensor>(shape, DataType::F32);
+    Tensor map_y = create_tensor<Tensor>(shape, DataType::F32);
+    Tensor dst   = create_tensor<Tensor>(shape, data_type);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(map_x.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(map_y.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NERemap remap;
+    remap.configure(&src, &map_x, &map_y, &dst, policy, border_mode);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    const int total_right  = ceil_to_multiple(shape[0], 16);
+    const int access_right = total_right + (((total_right - shape[0]) == 0) ? 1 : 0);
+
+    const PaddingSize read_padding(1, access_right - shape[0], 1, 1);
+    validate(src.info()->padding(), read_padding);
+
+    PaddingCalculator calculator(shape.x(), 16);
+    validate(dst.info()->padding(), calculator.required_padding());
+}
+
+template <typename T>
+using NERemapFixture = RemapValidationFixture<Tensor, Accessor, NERemap, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NERemapFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                                                             framework::dataset::make("DataType",
+                                                                                                                     DataType::U8)),
+                                                                                                     framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NERemapFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                                                           framework::dataset::make("DataType",
+                                                                                                                   DataType::U8)),
+                                                                                                   framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/Scale.cpp b/tests/validation/NEON/Scale.cpp
index aa7dc67..b92162e 100644
--- a/tests/validation/NEON/Scale.cpp
+++ b/tests/validation/NEON/Scale.cpp
@@ -30,6 +30,7 @@
 #include "tests/PaddingCalculator.h"
 #include "tests/datasets/BorderModeDataset.h"
 #include "tests/datasets/InterpolationPolicyDataset.h"
+#include "tests/datasets/SamplingPolicyDataset.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
@@ -66,10 +67,11 @@
 TEST_SUITE(NEON)
 TEST_SUITE(Scale)
 
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), ScaleDataTypes),
-                                                                           framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                   datasets::BorderModes()),
-               shape, data_type, policy, border_mode)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), ScaleDataTypes),
+                                                                                   framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                           datasets::BorderModes()),
+                                                                   framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER })),
+               shape, data_type, policy, border_mode, sampling_policy)
 {
     std::mt19937                          generator(library->seed());
     std::uniform_real_distribution<float> distribution_float(0.25, 2);
@@ -94,7 +96,7 @@
 
     // Create and configure function
     NEScale nescale;
-    nescale.configure(&src, &dst, policy, border_mode, constant_border_value);
+    nescale.configure(&src, &dst, policy, border_mode, constant_border_value, sampling_policy);
 
     // Validate valid region
     const ValidRegion dst_valid_region = calculate_valid_region_scale(*(src.info()), shape_scaled, policy, BorderSize(1), (border_mode == BorderMode::UNDEFINED));
@@ -116,10 +118,11 @@
 
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
-                                                                                                             DataType::F32)),
-                                                                                                     framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                             datasets::BorderModes()))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                     DataType::F32)),
+                                                                                                             framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                                                     datasets::BorderModes()),
+                                                                                             framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER })))
 {
     //Create valid region
     TensorInfo  src_info(_shape, 1, _data_type);
@@ -128,10 +131,11 @@
     // Validate output
     validate(Accessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
                                                                                                                  DataType::F32)),
-                                                                                                         framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                                 datasets::BorderModes()))
+                                                                                                                 framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                                                         datasets::BorderModes()),
+                                                                                                 framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER })))
 {
     //Create valid region
     TensorInfo  src_info(_shape, 1, _data_type);
@@ -145,10 +149,11 @@
 
 TEST_SUITE(Integer)
 TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
-                                                                                                               DataType::U8)),
-                                                                                                       framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                               datasets::BorderModes()))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                       DataType::U8)),
+                                                                                                               framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                                                       datasets::BorderModes()),
+                                                                                               framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER })))
 {
     //Create valid region
     TensorInfo  src_info(_shape, 1, _data_type);
@@ -157,10 +162,11 @@
     // Validate output
     validate(Accessor(_target), _reference, valid_region, tolerance_u8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::U8)),
-                                                                                                           framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                                   datasets::BorderModes()))
+                                                                                                                   framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                                                           datasets::BorderModes()),
+                                                                                                   framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER })))
 {
     //Create valid region
     TensorInfo  src_info(_shape, 1, _data_type);
@@ -171,10 +177,11 @@
 }
 TEST_SUITE_END()
 TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
-                                                                                                               DataType::S16)),
-                                                                                                       framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                               datasets::BorderModes()))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                       DataType::S16)),
+                                                                                                               framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                                                       datasets::BorderModes()),
+                                                                                               framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER })))
 {
     //Create valid region
     TensorInfo  src_info(_shape, 1, _data_type);
@@ -183,10 +190,11 @@
     // Validate output
     validate(Accessor(_target), _reference, valid_region, tolerance_s16, tolerance_num_s16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::S16)),
-                                                                                                           framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                                   datasets::BorderModes()))
+                                                                                                                   framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                                                           datasets::BorderModes()),
+                                                                                                   framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER })))
 {
     //Create valid region
     TensorInfo  src_info(_shape, 1, _data_type);
diff --git a/tests/validation/NEON/Schaar.cpp b/tests/validation/NEON/Schaar.cpp
new file mode 100644
index 0000000..0b96eee
--- /dev/null
+++ b/tests/validation/NEON/Schaar.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEScharr3x3.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/GradientDimensionDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ScharrFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(NEON)
+TEST_SUITE(Scharr)
+
+TEST_SUITE(W3x3)
+using NEScharr3x3Fixture = ScharrValidationFixture<Tensor, Accessor, NEScharr3x3, uint8_t, int16_t>;
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                   Format::U8)),
+               shape, border_mode, format)
+{
+    // Generate a random constant value
+    std::mt19937                           gen(library->seed());
+    std::uniform_int_distribution<uint8_t> int_dist(0, 255);
+    const uint8_t                          constant_border_value = int_dist(gen);
+
+    // Create tensors
+    Tensor src   = create_tensor<Tensor>(shape, data_type_from_format(format));
+    Tensor dst_x = create_tensor<Tensor>(shape, DataType::S16);
+    Tensor dst_y = create_tensor<Tensor>(shape, DataType::S16);
+
+    src.info()->set_format(format);
+    dst_x.info()->set_format(Format::S16);
+    dst_y.info()->set_format(Format::S16);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst_x.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst_y.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create scharr 3x3 configure function
+    NEScharr3x3 scharr;
+    scharr.configure(&src, &dst_x, &dst_y, border_mode, constant_border_value);
+
+    // Validate valid region
+    constexpr BorderSize border_size{ 1 };
+    const ValidRegion    dst_valid_region = shape_to_valid_region(shape, border_mode == BorderMode::UNDEFINED, border_size);
+
+    validate(dst_x.info()->valid_region(), dst_valid_region);
+    validate(dst_y.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+
+    calculator.set_border_mode(border_mode);
+    calculator.set_border_size(1);
+
+    const PaddingSize dst_padding = calculator.required_padding();
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-1);
+
+    const PaddingSize src_padding = calculator.required_padding();
+
+    validate(src.info()->padding(), src_padding);
+    validate(dst_x.info()->padding(), dst_padding);
+    validate(dst_y.info()->padding(), dst_padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEScharr3x3Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                        Format::U8)),
+                                                                                                datasets::GradientDimensions()))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(Accessor(_target.first), _reference.first, valid_region_x);
+
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(Accessor(_target.second), _reference.second, valid_region_y);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEScharr3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                      Format::U8)),
+                                                                                              datasets::GradientDimensions()))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(Accessor(_target.first), _reference.first, valid_region_x);
+
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(Accessor(_target.second), _reference.second, valid_region_y);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/Sobel.cpp b/tests/validation/NEON/Sobel.cpp
index 99f9e1f..e757d1d 100644
--- a/tests/validation/NEON/Sobel.cpp
+++ b/tests/validation/NEON/Sobel.cpp
@@ -100,8 +100,48 @@
     validate(dst_y.info()->padding(), dst_padding);
 }
 
-FIXTURE_DATA_TEST_CASE(RunSmall, NESobel3x3Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                               Format::U8)))
+TEST_SUITE(X)
+FIXTURE_DATA_TEST_CASE(RunSmall, NESobel3x3Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(Accessor(_target.first), _reference.first, valid_region_x);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NESobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                     Format::U8)),
+                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(Accessor(_target.first), _reference.first, valid_region_x);
+}
+TEST_SUITE_END()
+TEST_SUITE(Y)
+FIXTURE_DATA_TEST_CASE(RunSmall, NESobel3x3Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
+{
+    // Validate output
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(Accessor(_target.second), _reference.second, valid_region_y);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NESobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                     Format::U8)),
+                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
+{
+    // Validate output
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(Accessor(_target.second), _reference.second, valid_region_y);
+}
+TEST_SUITE_END()
+TEST_SUITE(XY)
+FIXTURE_DATA_TEST_CASE(RunSmall, NESobel3x3Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
 {
     // Validate output
     ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
@@ -111,8 +151,9 @@
     validate(Accessor(_target.second), _reference.second, valid_region_y);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, NESobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                             Format::U8)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NESobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                     Format::U8)),
+                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
 {
     // Validate output
     ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
@@ -122,6 +163,7 @@
     validate(Accessor(_target.second), _reference.second, valid_region_y);
 }
 TEST_SUITE_END()
+TEST_SUITE_END()
 
 TEST_SUITE(W5x5)
 using NESobel5x5Fixture = SobelValidationFixture<Tensor, Accessor, NESobel5x5, uint8_t, int16_t>;
@@ -176,9 +218,48 @@
     validate(dst_x.info()->padding(), dst_padding);
     validate(dst_y.info()->padding(), dst_padding);
 }
+TEST_SUITE(X)
+FIXTURE_DATA_TEST_CASE(RunSmall, NESobel5x5Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
+    validate(Accessor(_target.first), _reference.first, valid_region_x);
+}
 
-FIXTURE_DATA_TEST_CASE(RunSmall, NESobel5x5Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                               Format::U8)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NESobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                     Format::U8)),
+                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
+    validate(Accessor(_target.first), _reference.first, valid_region_x);
+}
+TEST_SUITE_END()
+TEST_SUITE(Y)
+FIXTURE_DATA_TEST_CASE(RunSmall, NESobel5x5Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
+{
+    // Validate output
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
+    validate(Accessor(_target.second), _reference.second, valid_region_y);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NESobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                     Format::U8)),
+                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
+{
+    // Validate output
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
+    validate(Accessor(_target.second), _reference.second, valid_region_y);
+}
+TEST_SUITE_END()
+TEST_SUITE(XY)
+FIXTURE_DATA_TEST_CASE(RunSmall, NESobel5x5Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
 {
     // Validate output
     ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
@@ -188,8 +269,9 @@
     validate(Accessor(_target.second), _reference.second, valid_region_y);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, NESobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                             Format::U8)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NESobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                     Format::U8)),
+                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
 {
     // Validate output
     ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
@@ -199,6 +281,7 @@
     validate(Accessor(_target.second), _reference.second, valid_region_y);
 }
 TEST_SUITE_END()
+TEST_SUITE_END()
 
 TEST_SUITE(W7x7)
 using NESobel7x7Fixture = SobelValidationFixture<Tensor, Accessor, NESobel7x7, uint8_t, int32_t>;
@@ -253,9 +336,48 @@
     validate(dst_x.info()->padding(), dst_padding);
     validate(dst_y.info()->padding(), dst_padding);
 }
+TEST_SUITE(X)
+FIXTURE_DATA_TEST_CASE(RunSmall, NESobel7x7Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
+    validate(Accessor(_target.first), _reference.first, valid_region_x);
+}
 
-FIXTURE_DATA_TEST_CASE(RunSmall, NESobel7x7Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                               Format::U8)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NESobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                     Format::U8)),
+                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
+    validate(Accessor(_target.first), _reference.first, valid_region_x);
+}
+TEST_SUITE_END()
+TEST_SUITE(Y)
+FIXTURE_DATA_TEST_CASE(RunSmall, NESobel7x7Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
+{
+    // Validate output
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
+    validate(Accessor(_target.second), _reference.second, valid_region_y);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NESobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                     Format::U8)),
+                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
+{
+    // Validate output
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
+    validate(Accessor(_target.second), _reference.second, valid_region_y);
+}
+TEST_SUITE_END()
+TEST_SUITE(XY)
+FIXTURE_DATA_TEST_CASE(RunSmall, NESobel7x7Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
 {
     // Validate output
     ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
@@ -265,8 +387,9 @@
     validate(Accessor(_target.second), _reference.second, valid_region_y);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, NESobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                             Format::U8)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NESobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                     Format::U8)),
+                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
 {
     // Validate output
     ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
@@ -276,6 +399,7 @@
     validate(Accessor(_target.second), _reference.second, valid_region_y);
 }
 TEST_SUITE_END()
+TEST_SUITE_END()
 
 TEST_SUITE_END()
 TEST_SUITE_END()
diff --git a/tests/validation/NEON/SoftmaxLayer.cpp b/tests/validation/NEON/SoftmaxLayer.cpp
index 5ede321..0b688df 100644
--- a/tests/validation/NEON/SoftmaxLayer.cpp
+++ b/tests/validation/NEON/SoftmaxLayer.cpp
@@ -44,18 +44,18 @@
 {
 /** Tolerance for float operations */
 constexpr AbsoluteTolerance<float> tolerance_f32(0.000001f);
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 constexpr AbsoluteTolerance<float> tolerance_f16(0.0001f);
-#endif /* ARM_COMPUTE_ENABLE_FP16*/
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
 /** Tolerance for fixed point operations */
 constexpr AbsoluteTolerance<int16_t> tolerance_fixed_point(2);
 
 /** CNN data types */
 const auto CNNDataTypes = framework::dataset::make("DataType",
 {
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
     DataType::F16,
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
     DataType::F32,
     DataType::QS8,
     DataType::QS16,
@@ -65,7 +65,7 @@
 TEST_SUITE(NEON)
 TEST_SUITE(SoftmaxLayer)
 
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), CNNDataTypes), shape, data_type)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::SoftmaxLayerSmallShapes(), datasets::SoftmaxLayerLargeShapes()), CNNDataTypes), shape, data_type)
 {
     // Set fixed point position data type allowed
     const int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
@@ -93,32 +93,62 @@
     validate(dst.info()->padding(), padding);
 }
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
+    framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Mismatching data types
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Mismatching shapes
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 2), // Mismatching fixed point
+                                            TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32),
+                                           }),
+    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16),
+                                            TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 3),
+                                            TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32),
+                                          })),
+    framework::dataset::make("Expected", { false, false, false, true })),
+    input_info, output_info, expected)
+{
+    bool is_valid = bool(NESoftmaxLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false)));
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 template <typename T>
 using NESoftmaxLayerFixture = SoftmaxValidationFixture<Tensor, Accessor, NESoftmaxLayer, T>;
 
 TEST_SUITE(Float)
-#ifdef ARM_COMPUTE_ENABLE_FP16
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SoftmaxLayerSmallShapes(),
+                                                                                                                 framework::dataset::make("DataType", DataType::F16)),
+                                                                                                         framework::dataset::make("Beta", { 1.0f, 2.0f })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerSmallShapes(),
+                                                                                                               framework::dataset::make("DataType", DataType::F16)),
+                                                                                                       framework::dataset::make("Beta", { 1.0f, 2.0f })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f16);
 }
 TEST_SUITE_END()
-#endif /* ARM_COMPUTE_ENABLE_FP16 */
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SoftmaxLayerSmallShapes(),
+                                                                                                                  framework::dataset::make("DataType", DataType::F32)),
+                                                                                                          framework::dataset::make("Beta", { 1.0f, 2.0f })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(),
+                                                                                                                framework::dataset::make("DataType", DataType::F32)),
+                                                                                                        framework::dataset::make("Beta", { 1.0f, 2.0f })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f32);
@@ -132,14 +162,14 @@
 TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
 // Testing for fixed point position [1,6) as reciprocal limits the maximum fixed point position to 5
-FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SoftmaxLayerSmallShapes(), framework::dataset::make("DataType",
                                                                                                                      DataType::QS8)),
                                                                                                                      framework::dataset::make("FractionalBits", 1, 6)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_fixed_point);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::QS8)),
                                                                                                                    framework::dataset::make("FractionalBits", 1, 6)))
 {
@@ -150,7 +180,7 @@
 
 TEST_SUITE(QS16)
 // Testing for fixed point position [1,14) as reciprocal limits the maximum fixed point position to 14
-FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SoftmaxLayerSmallShapes(),
                                                                                                                       framework::dataset::make("DataType",
                                                                                                                               DataType::QS16)),
                                                                                                                       framework::dataset::make("FractionalBits", 1, 14)))
@@ -158,7 +188,7 @@
     // Validate output
     validate(Accessor(_target), _reference, tolerance_fixed_point);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(),
                                                                                                                     framework::dataset::make("DataType",
                                                                                                                             DataType::QS16)),
                                                                                                                     framework::dataset::make("FractionalBits", 1, 14)))
diff --git a/tests/validation/NEON/Transpose.cpp b/tests/validation/NEON/Transpose.cpp
new file mode 100644
index 0000000..f2ef716
--- /dev/null
+++ b/tests/validation/NEON/Transpose.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NETranspose.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/TransposeFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(NEON)
+TEST_SUITE(Transpose)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
+    framework::dataset::make("InputInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::U8),  // Input not a multiple of 8
+                                            TensorInfo(TensorShape(21U, 13U), 1, DataType::U16), // Invalid shape
+                                            TensorInfo(TensorShape(20U, 13U), 1, DataType::U32),
+                                            TensorInfo(TensorShape(20U, 13U), 1, DataType::U8),  // Wrong data type
+                                            TensorInfo(TensorShape(20U, 13U), 1, DataType::U16),
+                                            TensorInfo(TensorShape(20U, 13U), 1, DataType::U32),
+                                          }),
+    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(13U, 21U), 1, DataType::U8),
+                                            TensorInfo(TensorShape(21U, 13U), 1, DataType::U16),
+                                            TensorInfo(TensorShape(13U, 20U), 1, DataType::U32),
+                                            TensorInfo(TensorShape(31U, 20U), 1, DataType::U16),
+                                            TensorInfo(TensorShape(13U, 20U), 1, DataType::U16),
+                                            TensorInfo(TensorShape(13U, 20U), 1, DataType::U32),
+                                           })),
+    framework::dataset::make("Expected", { true, false, true, false, true, true })),
+    a_info, output_info, expected)
+{
+    // Lock tensors
+    Status status =  NETranspose::validate(&a_info.clone()->set_is_resizable(false),
+                                           &output_info.clone()->set_is_resizable(false));
+    ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), framework::dataset::make("DataType", { DataType::S8, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32 })),
+               shape, data_type)
+{
+    // Make rows the columns of the original shape
+    TensorShape output_shape{ shape[1], shape[0] };
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, data_type);
+    Tensor dst = create_tensor<Tensor>(output_shape, data_type);
+
+    // Create and Configure function
+    NETranspose trans;
+    trans.configure(&src, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(output_shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding(0, 0);
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+template <typename T>
+using NETransposeFixture = TransposeValidationFixture<Tensor, Accessor, NETranspose, T>;
+
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NETransposeFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small1DShapes(), datasets::Small2DShapes()),
+                                                                                                         framework::dataset::make("DataType", DataType::U8)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NETransposeFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(concat(datasets::Large1DShapes(), datasets::Large2DShapes()),
+                                                                                                       framework::dataset::make("DataType", DataType::U8)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NETransposeFixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small1DShapes(), datasets::Small2DShapes()),
+                                                                                                          framework::dataset::make("DataType", DataType::U16)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NETransposeFixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(concat(datasets::Large1DShapes(), datasets::Large2DShapes()),
+                                                                                                        framework::dataset::make("DataType", DataType::U16)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U32)
+FIXTURE_DATA_TEST_CASE(RunSmall, NETransposeFixture<uint32_t>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small1DShapes(), datasets::Small2DShapes()),
+                                                                                                          framework::dataset::make("DataType", DataType::U32)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NETransposeFixture<uint32_t>, framework::DatasetMode::NIGHTLY, combine(concat(datasets::Large1DShapes(), datasets::Large2DShapes()),
+                                                                                                        framework::dataset::make("DataType", DataType::U32)))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/UNIT/MemoryManager.cpp b/tests/validation/NEON/UNIT/MemoryManager.cpp
new file mode 100644
index 0000000..27b05ed
--- /dev/null
+++ b/tests/validation/NEON/UNIT/MemoryManager.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/Allocator.h"
+#include "arm_compute/runtime/BlobLifetimeManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h"
+#include "arm_compute/runtime/PoolManager.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "support/ToolchainSupport.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/Utils.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(NEON)
+TEST_SUITE(UNIT)
+TEST_SUITE(MemoryManager)
+
+TEST_CASE(BlobMemoryManagerSimpleWithinFunctionLevel, framework::DatasetMode::ALL)
+{
+    Allocator allocator{};
+    auto      lifetime_mgr = std::make_shared<BlobLifetimeManager>();
+    auto      pool_mgr     = std::make_shared<PoolManager>();
+    auto      mm           = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(TensorShape(27U, 11U, 3U), DataType::F32, 1);
+    Tensor dst = create_tensor<Tensor>(TensorShape(27U, 11U, 3U), DataType::F32, 1);
+
+    // Create and configure function
+    NENormalizationLayer norm_layer_1(mm);
+    NENormalizationLayer norm_layer_2(mm);
+    norm_layer_1.configure(&src, &dst, NormalizationLayerInfo(NormType::CROSS_MAP, 3));
+    norm_layer_2.configure(&src, &dst, NormalizationLayerInfo(NormType::IN_MAP_1D, 3));
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Finalize memory manager
+    mm->set_allocator(&allocator);
+    mm->set_num_pools(1);
+    mm->finalize();
+    ARM_COMPUTE_EXPECT(mm->is_finalized(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS);
+
+    // Fill tensors
+    arm_compute::test::library->fill_tensor_uniform(Accessor(src), 0);
+
+    // Compute functions
+    norm_layer_1.run();
+    norm_layer_2.run();
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/UNIT/TensorAllocator.cpp b/tests/validation/NEON/UNIT/TensorAllocator.cpp
new file mode 100644
index 0000000..4732f3f
--- /dev/null
+++ b/tests/validation/NEON/UNIT/TensorAllocator.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "support/ToolchainSupport.h"
+#include "tests/Utils.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(NEON)
+TEST_SUITE(UNIT)
+TEST_SUITE(TensorAllocator)
+
+TEST_CASE(ImportMemory, framework::DatasetMode::ALL)
+{
+    // Init tensor info
+    TensorInfo info(TensorShape(24U, 16U, 3U), 1, DataType::F32);
+
+    // Allocate memory buffer
+    std::shared_ptr<uint8_t> buf(new uint8_t[info.total_size()](), [](uint8_t *ptr)
+    {
+        delete[] ptr;
+    });
+
+    // Negative case : Import empty memory
+    Tensor t1;
+    t1.allocator()->init(info);
+    ARM_COMPUTE_EXPECT(!bool(t1.allocator()->import_memory(Memory())), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(t1.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Negative case : Import memory to a tensor that is memory managed
+    Tensor      t2;
+    MemoryGroup mg;
+    t2.allocator()->set_associated_memory_group(&mg);
+    ARM_COMPUTE_EXPECT(!bool(t2.allocator()->import_memory(Memory(buf.get()))), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(t2.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Positive case : Set raw pointer
+    Tensor t3;
+    t3.allocator()->init(info);
+    ARM_COMPUTE_EXPECT(bool(t3.allocator()->import_memory(Memory(buf.get()))), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(!t3.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(t3.buffer() == buf.get(), framework::LogLevel::ERRORS);
+    t3.allocator()->free();
+    ARM_COMPUTE_EXPECT(t3.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(t3.buffer() == nullptr, framework::LogLevel::ERRORS);
+
+    // Positive case : Set managed pointer
+    Tensor t4;
+    t4.allocator()->init(info);
+    ARM_COMPUTE_EXPECT(bool(t4.allocator()->import_memory(Memory(buf))), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(!t4.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(t4.buffer() == buf.get(), framework::LogLevel::ERRORS);
+    t4.allocator()->free();
+    ARM_COMPUTE_EXPECT(t4.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(t4.buffer() == nullptr, framework::LogLevel::ERRORS);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/WarpAffine.cpp b/tests/validation/NEON/WarpAffine.cpp
index 678549d..8c83507 100644
--- a/tests/validation/NEON/WarpAffine.cpp
+++ b/tests/validation/NEON/WarpAffine.cpp
@@ -34,9 +34,9 @@
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/CPP/Utils.h"
 #include "tests/validation/Validation.h"
 #include "tests/validation/fixtures/WarpAffineFixture.h"
+#include "tests/validation/reference/Utils.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/NEON/WarpPerspective.cpp b/tests/validation/NEON/WarpPerspective.cpp
index 45d3a0b..804c080 100644
--- a/tests/validation/NEON/WarpPerspective.cpp
+++ b/tests/validation/NEON/WarpPerspective.cpp
@@ -106,20 +106,14 @@
                                                                                                                        framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
                                                                                                                datasets::BorderModes()))
 {
-    // Create the valid mask Tensor
-    RawTensor valid_mask(_reference.shape(), _reference.data_type());
-
-    validate(Accessor(_target), _reference, valid_mask, tolerance_value, tolerance_number);
+    validate(Accessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEWarpPerspectiveFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
                                                                                                                      DataType::U8)),
                                                                                                                      framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
                                                                                                              datasets::BorderModes()))
 {
-    // Create the valid mask Tensor
-    RawTensor valid_mask{ _reference.shape(), _reference.data_type() };
-
-    validate(Accessor(_target), _reference, valid_mask, tolerance_value, tolerance_number);
+    validate(Accessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
 }
 
 TEST_SUITE_END()
diff --git a/tests/validation/UNIT/TensorInfo.cpp b/tests/validation/UNIT/TensorInfo.cpp
index 2a6c336..dd7ae6d 100644
--- a/tests/validation/UNIT/TensorInfo.cpp
+++ b/tests/validation/UNIT/TensorInfo.cpp
@@ -36,10 +36,11 @@
 namespace validation
 {
 TEST_SUITE(UNIT)
-TEST_SUITE(TensorInfoValidation)
+TEST_SUITE(TensorInfo)
 
 // *INDENT-OFF*
 // clang-format off
+/** Validates TensorInfo Autopadding */
 DATA_TEST_CASE(AutoPadding, framework::DatasetMode::ALL, zip(zip(zip(
                framework::dataset::make("TensorShape", {
                TensorShape{},
@@ -82,6 +83,51 @@
 // clang-format on
 // *INDENT-ON*
 
+/** Validates that TensorInfo is clonable */
+TEST_CASE(Clone, framework::DatasetMode::ALL)
+{
+    // Create tensor info
+    TensorInfo info(TensorShape(23U, 17U, 3U), // tensor shape
+                    1,                         // number of channels
+                    DataType::F32);            // data type
+
+    // Get clone of current tensor info
+    std::unique_ptr<ITensorInfo> info_clone = info.clone();
+    ARM_COMPUTE_EXPECT(info_clone != nullptr, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(info_clone->total_size() == info.total_size(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(info_clone->num_channels() == info.num_channels(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(info_clone->data_type() == info.data_type(), framework::LogLevel::ERRORS);
+}
+
+/** Validates that TensorInfo can chain multiple set commands */
+TEST_CASE(TensorInfoBuild, framework::DatasetMode::ALL)
+{
+    // Create tensor info
+    TensorInfo info(TensorShape(23U, 17U, 3U), // tensor shape
+                    1,                         // number of channels
+                    DataType::F32);            // data type
+
+    // Update data type and number of channels
+    info.set_data_type(DataType::S32).set_num_channels(3);
+    ARM_COMPUTE_EXPECT(info.data_type() == DataType::S32, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(info.num_channels() == 3, framework::LogLevel::ERRORS);
+
+    // Update data type channels and set fixed point position
+    info.set_data_type(DataType::QS8).set_num_channels(1).set_fixed_point_position(3);
+    ARM_COMPUTE_EXPECT(info.data_type() == DataType::QS8, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(info.num_channels() == 1, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(info.fixed_point_position() == 3, framework::LogLevel::ERRORS);
+
+    // Update data type and set quantization info
+    info.set_data_type(DataType::QASYMM8).set_quantization_info(QuantizationInfo(0.5f, 15));
+    ARM_COMPUTE_EXPECT(info.data_type() == DataType::QASYMM8, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(info.quantization_info() == QuantizationInfo(0.5f, 15), framework::LogLevel::ERRORS);
+
+    // Update tensor shape
+    info.set_tensor_shape(TensorShape(13U, 15U));
+    ARM_COMPUTE_EXPECT(info.tensor_shape() == TensorShape(13U, 15U), framework::LogLevel::ERRORS);
+}
+
 TEST_SUITE_END() // TensorInfoValidation
 TEST_SUITE_END()
 } // namespace validation
diff --git a/tests/validation/UNIT/Utils.cpp b/tests/validation/UNIT/Utils.cpp
index 71f39a0..969b213 100644
--- a/tests/validation/UNIT/Utils.cpp
+++ b/tests/validation/UNIT/Utils.cpp
@@ -60,6 +60,20 @@
     ARM_COMPUTE_EXPECT(compare_dimensions(coordinate, ref_coordinate), framework::LogLevel::ERRORS);
 }
 
+DATA_TEST_CASE(RoundFloatToZero, framework::DatasetMode::ALL, zip(framework::dataset::make("FloatIn", { 1.f, 1.2f, 1.5f, 2.5f, 2.9f, -3.f, -3.5f, -3.8f, -4.3f, -4.5f }),
+                                                                  framework::dataset::make("FloatOut", { 1.f, 1.f, 1.f, 2.f, 2.f, -3.f, -3.f, -3.f, -4.f, -4.f })),
+               value, result)
+{
+    ARM_COMPUTE_EXPECT(round(value, RoundingPolicy::TO_ZERO) == result, framework::LogLevel::ERRORS);
+}
+
+DATA_TEST_CASE(RoundFloatToNearestUp, framework::DatasetMode::ALL, zip(framework::dataset::make("FloatIn", { 1.f, 1.2f, 1.5f, 2.5f, 2.9f, -3.f, -3.5f, -3.8f, -4.3f, -4.5f }),
+                                                                       framework::dataset::make("FloatOut", { 1.f, 1.f, 2.f, 3.f, 3.f, -3.f, -4.f, -4.f, -4.f, -5.f })),
+               value, result)
+{
+    ARM_COMPUTE_EXPECT(round(value, RoundingPolicy::TO_NEAREST_UP) == result, framework::LogLevel::ERRORS);
+}
+
 DATA_TEST_CASE(Coord2Index, framework::DatasetMode::ALL, zip(zip(framework::dataset::make("Shape", { TensorShape{ 1U }, TensorShape{ 2U }, TensorShape{ 2U, 3U } }),
                                                                  framework::dataset::make("Coordinates", { Coordinates{ 0 }, Coordinates{ 1 }, Coordinates{ 0, 1 } })),
                                                              framework::dataset::make("Index", { 0, 1, 2 })),
diff --git a/tests/validation/Validation.h b/tests/validation/Validation.h
index f220224..1f81d38 100644
--- a/tests/validation/Validation.h
+++ b/tests/validation/Validation.h
@@ -27,6 +27,7 @@
 #include "arm_compute/core/FixedPoint.h"
 #include "arm_compute/core/IArray.h"
 #include "arm_compute/core/Types.h"
+#include "support/ToolchainSupport.h"
 #include "tests/IAccessor.h"
 #include "tests/SimpleTensor.h"
 #include "tests/Types.h"
@@ -230,7 +231,8 @@
 
 /** Validate key points. */
 template <typename T, typename U, typename V = AbsoluteTolerance<float>>
-void validate_keypoints(T target_first, T target_last, U reference_first, U reference_last, V tolerance = AbsoluteTolerance<float>());
+void validate_keypoints(T target_first, T target_last, U reference_first, U reference_last, V tolerance = AbsoluteTolerance<float>(),
+                        float allowed_missing_percentage = 5.f, float allowed_mismatch_percentage = 5.f);
 
 template <typename T>
 struct compare_base
@@ -255,7 +257,7 @@
 
     operator bool() const
     {
-        if(!std::isfinite(this->_target) || !std::isfinite(this->_reference))
+        if(!support::cpp11::isfinite(this->_target) || !support::cpp11::isfinite(this->_reference))
         {
             return false;
         }
@@ -279,7 +281,7 @@
 
     operator bool() const
     {
-        if(!std::isfinite(this->_target) || !std::isfinite(this->_reference))
+        if(!support::cpp11::isfinite(this->_target) || !support::cpp11::isfinite(this->_reference))
         {
             return false;
         }
@@ -315,6 +317,13 @@
     validate(tensor, reference, shape_to_valid_region(tensor.shape()), tolerance_value, tolerance_number);
 }
 
+template <typename T, typename U, typename = typename std::enable_if<std::is_integral<T>::value>::type>
+void validate_wrap(const IAccessor &tensor, const SimpleTensor<T> &reference, U tolerance_value, float tolerance_number)
+{
+    // Validate with valid region covering the entire shape
+    validate_wrap(tensor, reference, shape_to_valid_region(tensor.shape()), tolerance_value, tolerance_number);
+}
+
 template <typename T, typename U>
 void validate(const IAccessor &tensor, const SimpleTensor<T> &reference, const ValidRegion &valid_region, U tolerance_value, float tolerance_number)
 {
@@ -376,6 +385,88 @@
     }
 }
 
+template <typename T, typename U, typename = typename std::enable_if<std::is_integral<T>::value>::type>
+void validate_wrap(const IAccessor &tensor, const SimpleTensor<T> &reference, const ValidRegion &valid_region, U tolerance_value, float tolerance_number)
+{
+    int64_t num_mismatches = 0;
+    int64_t num_elements   = 0;
+
+    ARM_COMPUTE_EXPECT_EQUAL(tensor.element_size(), reference.element_size(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT_EQUAL(tensor.data_type(), reference.data_type(), framework::LogLevel::ERRORS);
+
+    if(reference.format() != Format::UNKNOWN)
+    {
+        ARM_COMPUTE_EXPECT_EQUAL(tensor.format(), reference.format(), framework::LogLevel::ERRORS);
+    }
+
+    ARM_COMPUTE_EXPECT_EQUAL(tensor.num_channels(), reference.num_channels(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(compare_dimensions(tensor.shape(), reference.shape()), framework::LogLevel::ERRORS);
+
+    const int min_elements = std::min(tensor.num_elements(), reference.num_elements());
+    const int min_channels = std::min(tensor.num_channels(), reference.num_channels());
+
+    // Iterate over all elements within valid region, e.g. U8, S16, RGB888, ...
+    for(int element_idx = 0; element_idx < min_elements; ++element_idx)
+    {
+        const Coordinates id = index2coord(reference.shape(), element_idx);
+
+        if(is_in_valid_region(valid_region, id))
+        {
+            // Iterate over all channels within one element
+            for(int c = 0; c < min_channels; ++c)
+            {
+                const T &target_value    = reinterpret_cast<const T *>(tensor(id))[c];
+                const T &reference_value = reinterpret_cast<const T *>(reference(id))[c];
+
+                bool equal = compare<U>(target_value, reference_value, tolerance_value);
+
+                // check for wrapping
+                if(!equal)
+                {
+                    if(!support::cpp11::isfinite(target_value) || !support::cpp11::isfinite(reference_value))
+                    {
+                        equal = false;
+                    }
+                    else
+                    {
+                        using limits_type = typename std::make_unsigned<T>::type;
+
+                        uint64_t max             = std::numeric_limits<limits_type>::max();
+                        uint64_t abs_sum         = std::abs(static_cast<int64_t>(target_value)) + std::abs(static_cast<int64_t>(reference_value));
+                        uint64_t wrap_difference = max - abs_sum;
+
+                        equal = wrap_difference < static_cast<uint64_t>(tolerance_value);
+                    }
+                }
+
+                if(!equal)
+                {
+                    ARM_COMPUTE_TEST_INFO("id = " << id);
+                    ARM_COMPUTE_TEST_INFO("channel = " << c);
+                    ARM_COMPUTE_TEST_INFO("target = " << std::setprecision(5) << framework::make_printable(target_value));
+                    ARM_COMPUTE_TEST_INFO("reference = " << std::setprecision(5) << framework::make_printable(reference_value));
+                    ARM_COMPUTE_TEST_INFO("wrap_tolerance = " << std::setprecision(5) << framework::make_printable(static_cast<typename U::value_type>(tolerance_value)));
+                    framework::ARM_COMPUTE_PRINT_INFO();
+
+                    ++num_mismatches;
+                }
+
+                ++num_elements;
+            }
+        }
+    }
+
+    if(num_elements > 0)
+    {
+        const int64_t absolute_tolerance_number = tolerance_number * num_elements;
+        const float   percent_mismatches        = static_cast<float>(num_mismatches) / num_elements * 100.f;
+
+        ARM_COMPUTE_TEST_INFO(num_mismatches << " values (" << std::fixed << std::setprecision(2) << percent_mismatches
+                              << "%) mismatched (maximum tolerated " << std::setprecision(2) << tolerance_number << "%)");
+        ARM_COMPUTE_EXPECT(num_mismatches <= absolute_tolerance_number, framework::LogLevel::ERRORS);
+    }
+}
+
 /** Check which keypoints from [first1, last1) are missing in [first2, last2) */
 template <typename T, typename U, typename V>
 std::pair<int64_t, int64_t> compare_keypoints(T first1, T last1, U first2, U last2, V tolerance)
@@ -393,16 +484,16 @@
         if(point == last2)
         {
             ++num_missing;
+            ARM_COMPUTE_TEST_INFO("Key point not found" << *first1)
             ARM_COMPUTE_TEST_INFO("keypoint1 = " << *first1)
-            ARM_COMPUTE_EXPECT_FAIL("Key point not found", framework::LogLevel::DEBUG);
         }
         else if(!validate(point->tracking_status, first1->tracking_status) || !validate(point->strength, first1->strength, tolerance) || !validate(point->scale, first1->scale)
                 || !validate(point->orientation, first1->orientation) || !validate(point->error, first1->error))
         {
             ++num_mismatches;
+            ARM_COMPUTE_TEST_INFO("Mismatching keypoint")
             ARM_COMPUTE_TEST_INFO("keypoint1 = " << *first1)
             ARM_COMPUTE_TEST_INFO("keypoint2 = " << *point)
-            ARM_COMPUTE_EXPECT_FAIL("Mismatching keypoint", framework::LogLevel::DEBUG);
         }
 
         ++first1;
@@ -412,13 +503,12 @@
 }
 
 template <typename T, typename U, typename V>
-void validate_keypoints(T target_first, T target_last, U reference_first, U reference_last, V tolerance)
+void validate_keypoints(T target_first, T target_last, U reference_first, U reference_last, V tolerance,
+                        float allowed_missing_percentage, float allowed_mismatch_percentage)
 {
     const int64_t num_elements_target    = std::distance(target_first, target_last);
     const int64_t num_elements_reference = std::distance(reference_first, reference_last);
 
-    ARM_COMPUTE_EXPECT_EQUAL(num_elements_target, num_elements_reference, framework::LogLevel::ERRORS);
-
     int64_t num_missing    = 0;
     int64_t num_mismatches = 0;
 
@@ -430,10 +520,10 @@
         const float percent_mismatches = static_cast<float>(num_mismatches) / num_elements_reference * 100.f;
 
         ARM_COMPUTE_TEST_INFO(num_missing << " keypoints (" << std::fixed << std::setprecision(2) << percent_missing << "%) are missing in target");
-        ARM_COMPUTE_EXPECT_EQUAL(num_missing, 0, framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(percent_missing <= allowed_missing_percentage, framework::LogLevel::ERRORS);
 
         ARM_COMPUTE_TEST_INFO(num_mismatches << " keypoints (" << std::fixed << std::setprecision(2) << percent_mismatches << "%) mismatched");
-        ARM_COMPUTE_EXPECT_EQUAL(num_mismatches, 0, framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(percent_mismatches <= allowed_mismatch_percentage, framework::LogLevel::ERRORS);
     }
 
     if(num_elements_target > 0)
@@ -443,7 +533,7 @@
         const float percent_missing = static_cast<float>(num_missing) / num_elements_target * 100.f;
 
         ARM_COMPUTE_TEST_INFO(num_missing << " keypoints (" << std::fixed << std::setprecision(2) << percent_missing << "%) are not part of target");
-        ARM_COMPUTE_EXPECT_EQUAL(num_missing, 0, framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(percent_missing <= allowed_missing_percentage, framework::LogLevel::ERRORS);
     }
 }
 
diff --git a/tests/validation/fixtures/AbsoluteDifferenceFixture.h b/tests/validation/fixtures/AbsoluteDifferenceFixture.h
new file mode 100644
index 0000000..b725304
--- /dev/null
+++ b/tests/validation/fixtures/AbsoluteDifferenceFixture.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_ABSOLUTE_DIFFERENCE_FIXTURE
+#define ARM_COMPUTE_TEST_ABSOLUTE_DIFFERENCE_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/AbsoluteDifference.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class AbsoluteDifferenceValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type0, DataType data_type1, DataType output_data_type)
+    {
+        _target    = compute_target(shape, data_type0, data_type1, output_data_type);
+        _reference = compute_reference(shape, data_type0, data_type1, output_data_type);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        library->fill_tensor_uniform(tensor, i);
+    }
+
+    TensorType compute_target(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type)
+    {
+        // Create tensors
+        TensorType ref_src1 = create_tensor<TensorType>(shape, data_type0, 1);
+        TensorType ref_src2 = create_tensor<TensorType>(shape, data_type1, 1);
+        TensorType dst      = create_tensor<TensorType>(shape, output_data_type, 1);
+
+        // Create and configure function
+        FunctionType abs_diff;
+        abs_diff.configure(&ref_src1, &ref_src2, &dst);
+
+        ARM_COMPUTE_EXPECT(ref_src1.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(ref_src2.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        ref_src1.allocator()->allocate();
+        ref_src2.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!ref_src1.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!ref_src2.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(ref_src1), 0);
+        fill(AccessorType(ref_src2), 1);
+
+        // Compute function
+        abs_diff.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type)
+    {
+        // Create reference
+        SimpleTensor<T> ref_src1{ shape, data_type0, 1 };
+        SimpleTensor<T> ref_src2{ shape, data_type1, 1 };
+
+        // Fill reference
+        fill(ref_src1, 0);
+        fill(ref_src2, 1);
+
+        return reference::absolute_difference<T>(ref_src1, ref_src2, output_data_type);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_ABSOLUTE_DIFFERENCE_FIXTURE */
diff --git a/tests/validation/fixtures/AccumulateFixture.h b/tests/validation/fixtures/AccumulateFixture.h
new file mode 100644
index 0000000..0910001
--- /dev/null
+++ b/tests/validation/fixtures/AccumulateFixture.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_ACCUMULATE_FIXTURE
+#define ARM_COMPUTE_TEST_ACCUMULATE_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/Accumulate.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
+class AccumulateBaseValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type, DataType output_data_type)
+    {
+        _target    = compute_target(shape, data_type, output_data_type);
+        _reference = compute_reference(shape, data_type, output_data_type);
+    }
+
+protected:
+    template <typename U, typename D>
+    void fill(U &&tensor, int i, D max)
+    {
+        library->fill_tensor_uniform(tensor, i, static_cast<D>(0), max);
+    }
+
+    TensorType compute_target(const TensorShape &shape, DataType data_type, DataType output_data_type)
+    {
+        // Create tensors
+        TensorType ref_src = create_tensor<TensorType>(shape, data_type);
+        TensorType dst     = create_tensor<TensorType>(shape, output_data_type);
+
+        // Create and configure function
+        FunctionType accum;
+        accum_conf(accum, ref_src, dst);
+
+        ARM_COMPUTE_EXPECT(ref_src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        ref_src.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!ref_src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        const T1 max = std::numeric_limits<T1>::max();
+
+        // Fill tensors
+        fill(AccessorType(ref_src), 0, max);
+        fill(AccessorType(dst), 1, static_cast<T2>(max));
+
+        // Compute function
+        accum.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T2> compute_reference(const TensorShape &shape, DataType data_type, DataType output_data_type)
+    {
+        // Create reference
+        SimpleTensor<T1> ref_src{ shape, data_type };
+
+        const T1 max = std::numeric_limits<T1>::max();
+
+        // Fill reference
+        fill(ref_src, 0, max);
+
+        return accum_ref(ref_src, output_data_type);
+    }
+
+    virtual void accum_conf(FunctionType &func, const TensorType &input, TensorType &accum) = 0;
+
+    virtual SimpleTensor<T2> accum_ref(const SimpleTensor<T1> &input, DataType output_data_type) = 0;
+
+    TensorType       _target{};
+    SimpleTensor<T2> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
+class AccumulateValidationFixture : public AccumulateBaseValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type, DataType output_data_type)
+    {
+        AccumulateBaseValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, data_type, output_data_type);
+    }
+
+    virtual void accum_conf(FunctionType &func, const TensorType &input, TensorType &accum) override
+    {
+        func.configure(&input, &accum);
+    }
+
+    virtual SimpleTensor<T2> accum_ref(const SimpleTensor<T1> &input, DataType output_data_type) override
+    {
+        return reference::accumulate<T1, T2>(input, output_data_type);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
+class AccumulateWeightedValidationFixture : public AccumulateBaseValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type, DataType output_data_type)
+    {
+        std::mt19937                     gen(library->seed());
+        std::uniform_real_distribution<> float_dist(0, 1);
+
+        _alpha = float_dist(gen);
+
+        AccumulateBaseValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, data_type, output_data_type);
+    }
+
+    virtual void accum_conf(FunctionType &func, const TensorType &input, TensorType &accum) override
+    {
+        func.configure(&input, _alpha, &accum);
+    }
+
+    virtual SimpleTensor<T2> accum_ref(const SimpleTensor<T1> &input, DataType output_data_type) override
+    {
+        return reference::accumulate_weighted<T1, T2>(input, _alpha, output_data_type);
+    }
+
+    float _alpha{ 0.f };
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
+class AccumulateSquaredValidationFixture : public AccumulateBaseValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type, DataType output_data_type)
+    {
+        std::mt19937                            gen(library->seed());
+        std::uniform_int_distribution<uint32_t> int_dist(0, 15);
+
+        _shift = int_dist(gen);
+
+        AccumulateBaseValidationFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, data_type, output_data_type);
+    }
+
+    virtual void accum_conf(FunctionType &func, const TensorType &input, TensorType &accum) override
+    {
+        func.configure(&input, _shift, &accum);
+    }
+
+    virtual SimpleTensor<T2> accum_ref(const SimpleTensor<T1> &input, DataType output_data_type) override
+    {
+        return reference::accumulate_squared<T1, T2>(input, _shift, output_data_type);
+    }
+
+    uint32_t _shift{ 0U };
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_ACCUMULATE_FIXTURE */
diff --git a/tests/validation/fixtures/ActivationLayerFixture.h b/tests/validation/fixtures/ActivationLayerFixture.h
index 384e63b..e212c7b 100644
--- a/tests/validation/fixtures/ActivationLayerFixture.h
+++ b/tests/validation/fixtures/ActivationLayerFixture.h
@@ -31,8 +31,8 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/ActivationLayer.h"
 #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/ActivationLayer.h"
 
 #include <random>
 
@@ -43,20 +43,21 @@
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class ActivationValidationFixedPointFixture : public framework::Fixture
+class ActivationValidationGenericFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, bool in_place, ActivationLayerInfo::ActivationFunction function, float alpha_beta, DataType data_type, int fractional_bits)
+    void setup(TensorShape shape, bool in_place, ActivationLayerInfo::ActivationFunction function, float alpha_beta, DataType data_type, int fractional_bits, QuantizationInfo quantization_info)
     {
-        _fractional_bits = fractional_bits;
-        _data_type       = data_type;
-        _function        = function;
+        _fractional_bits   = fractional_bits;
+        _quantization_info = quantization_info;
+        _data_type         = data_type;
+        _function          = function;
 
         ActivationLayerInfo info(function, alpha_beta, alpha_beta);
 
-        _target    = compute_target(shape, in_place, info, data_type, fractional_bits);
-        _reference = compute_reference(shape, info, data_type, fractional_bits);
+        _target    = compute_target(shape, in_place, info, data_type, fractional_bits, quantization_info);
+        _reference = compute_reference(shape, info, data_type, fractional_bits, quantization_info);
     }
 
 protected:
@@ -71,6 +72,10 @@
             std::uniform_real_distribution<> distribution(min_bound, max_bound);
             library->fill(tensor, distribution, 0);
         }
+        else if(is_data_type_quantized_asymmetric(tensor.data_type()))
+        {
+            library->fill_tensor_uniform(tensor, 0);
+        }
         else
         {
             int min_bound = 0;
@@ -81,11 +86,11 @@
         }
     }
 
-    TensorType compute_target(const TensorShape &shape, bool in_place, ActivationLayerInfo info, DataType data_type, int fixed_point_position = 0)
+    TensorType compute_target(const TensorShape &shape, bool in_place, ActivationLayerInfo info, DataType data_type, int fixed_point_position, QuantizationInfo quantization_info)
     {
         // Create tensors
-        TensorType src = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position);
-        TensorType dst = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position);
+        TensorType src = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position, quantization_info);
+        TensorType dst = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position, quantization_info);
 
         // Create and configure function
         FunctionType act_layer;
@@ -123,10 +128,10 @@
         }
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &shape, ActivationLayerInfo info, DataType data_type, int fixed_point_position = 0)
+    SimpleTensor<T> compute_reference(const TensorShape &shape, ActivationLayerInfo info, DataType data_type, int fixed_point_position, QuantizationInfo quantization_info)
     {
         // Create reference
-        SimpleTensor<T> src{ shape, data_type, 1, fixed_point_position };
+        SimpleTensor<T> src{ shape, data_type, 1, fixed_point_position, quantization_info };
 
         // Fill reference
         fill(src);
@@ -137,20 +142,44 @@
     TensorType                              _target{};
     SimpleTensor<T>                         _reference{};
     int                                     _fractional_bits{};
+    QuantizationInfo                        _quantization_info{};
     DataType                                _data_type{};
     ActivationLayerInfo::ActivationFunction _function{};
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class ActivationValidationFixture : public ActivationValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>
+class ActivationValidationFixture : public ActivationValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
 {
 public:
     template <typename...>
     void setup(TensorShape shape, bool in_place, ActivationLayerInfo::ActivationFunction function, float alpha_beta, DataType data_type)
     {
-        ActivationValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, in_place, function, alpha_beta, data_type, 0);
+        ActivationValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, in_place, function, alpha_beta, data_type, 0, QuantizationInfo());
     }
 };
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ActivationValidationFixedPointFixture : public ActivationValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, bool in_place, ActivationLayerInfo::ActivationFunction function, float alpha_beta, DataType data_type, int fractional_bits)
+    {
+        ActivationValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, in_place, function, alpha_beta, data_type, fractional_bits, QuantizationInfo());
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ActivationValidationQuantizedFixture : public ActivationValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, bool in_place, ActivationLayerInfo::ActivationFunction function, float alpha_beta, DataType data_type, QuantizationInfo quantization_info)
+    {
+        ActivationValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, in_place, function, alpha_beta, data_type, 0, quantization_info);
+    }
+};
+
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/fixtures/ArithmeticAdditionFixture.h b/tests/validation/fixtures/ArithmeticAdditionFixture.h
index 02c3ca9..c3a51b9 100644
--- a/tests/validation/fixtures/ArithmeticAdditionFixture.h
+++ b/tests/validation/fixtures/ArithmeticAdditionFixture.h
@@ -31,8 +31,8 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/ArithmeticAddition.h"
 #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/ArithmeticAddition.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/fixtures/ArithmeticSubtractionFixture.h b/tests/validation/fixtures/ArithmeticSubtractionFixture.h
index 2c683d6..ba0dd14 100644
--- a/tests/validation/fixtures/ArithmeticSubtractionFixture.h
+++ b/tests/validation/fixtures/ArithmeticSubtractionFixture.h
@@ -31,8 +31,8 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/ArithmeticSubtraction.h"
 #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/ArithmeticSubtraction.h"
 
 namespace arm_compute
 {
@@ -40,7 +40,7 @@
 {
 namespace validation
 {
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2 = T1, typename T3 = T1>
 class ArithmeticSubtractionValidationFixedPointFixture : public framework::Fixture
 {
 public:
@@ -93,31 +93,31 @@
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, int fixed_point_position)
+    SimpleTensor<T3> compute_reference(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy, int fixed_point_position)
     {
         // Create reference
-        SimpleTensor<T> ref_src1{ shape, data_type0, 1, fixed_point_position };
-        SimpleTensor<T> ref_src2{ shape, data_type1, 1, fixed_point_position };
+        SimpleTensor<T1> ref_src1{ shape, data_type0, 1, fixed_point_position };
+        SimpleTensor<T2> ref_src2{ shape, data_type1, 1, fixed_point_position };
 
         // Fill reference
         fill(ref_src1, 0);
         fill(ref_src2, 1);
 
-        return reference::arithmetic_subtraction<T>(ref_src1, ref_src2, output_data_type, convert_policy);
+        return reference::arithmetic_subtraction<T1, T2, T3>(ref_src1, ref_src2, output_data_type, convert_policy);
     }
 
-    TensorType      _target{};
-    SimpleTensor<T> _reference{};
-    int             _fractional_bits{};
+    TensorType       _target{};
+    SimpleTensor<T3> _reference{};
+    int              _fractional_bits{};
 };
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class ArithmeticSubtractionValidationFixture : public ArithmeticSubtractionValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2 = T1, typename T3 = T1>
+class ArithmeticSubtractionValidationFixture : public ArithmeticSubtractionValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T1, T2, T3>
 {
 public:
     template <typename...>
     void setup(TensorShape shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy)
     {
-        ArithmeticSubtractionValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type0, data_type1, output_data_type, convert_policy, 0);
+        ArithmeticSubtractionValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T1, T2, T3>::setup(shape, data_type0, data_type1, output_data_type, convert_policy, 0);
     }
 };
 } // namespace validation
diff --git a/tests/validation/fixtures/BatchNormalizationLayerFixture.h b/tests/validation/fixtures/BatchNormalizationLayerFixture.h
index f4772a8..298c9ca 100644
--- a/tests/validation/fixtures/BatchNormalizationLayerFixture.h
+++ b/tests/validation/fixtures/BatchNormalizationLayerFixture.h
@@ -31,8 +31,8 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/BatchNormalizationLayer.h"
 #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/BatchNormalizationLayer.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/fixtures/BitwiseAndFixture.h b/tests/validation/fixtures/BitwiseAndFixture.h
index 0dfff86..c1247de 100644
--- a/tests/validation/fixtures/BitwiseAndFixture.h
+++ b/tests/validation/fixtures/BitwiseAndFixture.h
@@ -31,7 +31,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/BitwiseAnd.h"
+#include "tests/validation/reference/BitwiseAnd.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/fixtures/BitwiseNotFixture.h b/tests/validation/fixtures/BitwiseNotFixture.h
index e5bf699..f90d089 100644
--- a/tests/validation/fixtures/BitwiseNotFixture.h
+++ b/tests/validation/fixtures/BitwiseNotFixture.h
@@ -31,7 +31,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/BitwiseNot.h"
+#include "tests/validation/reference/BitwiseNot.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/fixtures/BitwiseOrFixture.h b/tests/validation/fixtures/BitwiseOrFixture.h
index d61e767..d6ec5b8 100644
--- a/tests/validation/fixtures/BitwiseOrFixture.h
+++ b/tests/validation/fixtures/BitwiseOrFixture.h
@@ -31,7 +31,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/BitwiseOr.h"
+#include "tests/validation/reference/BitwiseOr.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/fixtures/BitwiseXorFixture.h b/tests/validation/fixtures/BitwiseXorFixture.h
index 16fa8c0..8da2181 100644
--- a/tests/validation/fixtures/BitwiseXorFixture.h
+++ b/tests/validation/fixtures/BitwiseXorFixture.h
@@ -31,7 +31,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/BitwiseXor.h"
+#include "tests/validation/reference/BitwiseXor.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/fixtures/Box3x3Fixture.h b/tests/validation/fixtures/Box3x3Fixture.h
index a53987a..e851b34 100644
--- a/tests/validation/fixtures/Box3x3Fixture.h
+++ b/tests/validation/fixtures/Box3x3Fixture.h
@@ -31,7 +31,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/Box3x3.h"
+#include "tests/validation/reference/Box3x3.h"
 
 #include <random>
 
@@ -52,8 +52,9 @@
         std::uniform_int_distribution<uint8_t> distribution(0, 255);
         const uint8_t                          constant_border_value = distribution(gen);
 
-        _target    = compute_target(shape, data_type, border_mode, constant_border_value);
-        _reference = compute_reference(shape, data_type, border_mode, constant_border_value);
+        _border_mode = border_mode;
+        _target      = compute_target(shape, data_type, border_mode, constant_border_value);
+        _reference   = compute_reference(shape, data_type, border_mode, constant_border_value);
     }
 
 protected:
diff --git a/tests/validation/fixtures/ConvolutionLayerFixture.h b/tests/validation/fixtures/ConvolutionLayerFixture.h
index fe20699..48b4665 100644
--- a/tests/validation/fixtures/ConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/ConvolutionLayerFixture.h
@@ -32,9 +32,9 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/ConvolutionLayer.h"
-#include "tests/validation/CPP/Utils.h"
 #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/ConvolutionLayer.h"
+#include "tests/validation/reference/Utils.h"
 
 #include <random>
 
@@ -47,17 +47,24 @@
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class ConvolutionValidationFixedPointFixture : public framework::Fixture
+class ConvolutionValidationGenericFixture : public framework::Fixture
 {
 public:
-    template <typename...>
-    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, bool reshape_weights, DataType data_type, int fractional_bits)
-    {
-        _fractional_bits = fractional_bits;
-        _data_type       = data_type;
+    using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value, int32_t, T>::type;
 
-        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, data_type, fractional_bits);
-        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, fractional_bits);
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, bool reshape_weights,
+               DataType data_type, int fractional_bits, QuantizationInfo quantization_info)
+    {
+        _data_type         = data_type;
+        _is_quantized      = is_data_type_quantized_asymmetric(data_type);
+        _bias_data_type    = _is_quantized ? DataType::S32 : data_type;
+        _fractional_bits   = fractional_bits;
+        _quantization_info = quantization_info;
+
+        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights);
+        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info);
     }
 
 protected:
@@ -66,6 +73,18 @@
     {
         switch(tensor.data_type())
         {
+            case DataType::QASYMM8:
+            {
+                std::uniform_int_distribution<uint8_t> distribution(0, 3);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            case DataType::S32:
+            {
+                std::uniform_int_distribution<int32_t> distribution(-100, 100);
+                library->fill(tensor, distribution, i);
+                break;
+            }
             case DataType::F16:
             case DataType::F32:
             {
@@ -79,7 +98,7 @@
     }
 
     TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info,
-                              bool reshape_weights, DataType data_type, int fixed_point_position)
+                              bool reshape_weights)
     {
         WeightsInfo weights_info(!reshape_weights, weights_shape.x(), weights_shape.y(), weights_shape[3]);
         TensorShape reshaped_weights_shape(weights_shape);
@@ -90,15 +109,16 @@
             const bool is_fully_connected_convolution = (output_shape.x() == 1 && output_shape.y() == 1);
             bool       is_optimised                   = false;
 #if defined(__arm__)
-            is_optimised = std::is_same<FunctionType, NEConvolutionLayer>::value && NEScheduler::get().cpu_info().CPU == CPUTarget::ARMV7 && data_type == DataType::F32;
+            is_optimised = std::is_same<FunctionType, NEConvolutionLayer>::value && NEScheduler::get().cpu_info().CPU == CPUTarget::ARMV7 && _data_type == DataType::F32;
 #elif defined(__aarch64__)
-            is_optimised = std::is_same<FunctionType, NEConvolutionLayer>::value && NEScheduler::get().cpu_info().CPU >= CPUTarget::ARMV8 && data_type == DataType::F32;
+            is_optimised = std::is_same<FunctionType, NEConvolutionLayer>::value && NEScheduler::get().cpu_info().CPU >= CPUTarget::ARMV8 && _data_type == DataType::F32;
 #endif /* defined(__arm__) || defined(__aarch64__) */
 
             reshaped_weights_shape.collapse(3);
 
-            if(bias_shape.total_size() > 0)
+            if(bias_shape.total_size() > 0 && !_is_quantized)
             {
+                // Add bias to the weights reshaped matrix
                 reshaped_weights_shape.set(0, reshaped_weights_shape.x() + 1);
             }
 
@@ -110,17 +130,17 @@
             }
             else
             {
-                const int interleave_width = 16 / data_size_from_type(data_type);
+                const int interleave_width = 16 / data_size_from_type(_data_type);
                 reshaped_weights_shape.set(0, reshaped_weights_shape.x() * interleave_width);
                 reshaped_weights_shape.set(1, static_cast<unsigned int>(std::ceil(reshaped_weights_shape.y() / static_cast<float>(interleave_width))));
             }
         }
 
         // Create tensors
-        TensorType src     = create_tensor<TensorType>(input_shape, data_type, 1, fixed_point_position);
-        TensorType weights = create_tensor<TensorType>(reshaped_weights_shape, data_type, 1, fixed_point_position);
-        TensorType bias    = create_tensor<TensorType>(bias_shape, data_type, 1, fixed_point_position);
-        TensorType dst     = create_tensor<TensorType>(output_shape, data_type, 1, fixed_point_position);
+        TensorType src     = create_tensor<TensorType>(input_shape, _data_type, 1, _fractional_bits, _quantization_info);
+        TensorType weights = create_tensor<TensorType>(reshaped_weights_shape, _data_type, 1, _fractional_bits, _quantization_info);
+        TensorType bias    = create_tensor<TensorType>(bias_shape, _bias_data_type, 1, _fractional_bits, _quantization_info);
+        TensorType dst     = create_tensor<TensorType>(output_shape, _data_type, 1, _fractional_bits, _quantization_info);
 
         // Create and configure function
         FunctionType conv;
@@ -150,20 +170,28 @@
             const bool is_fully_connected_convolution = (output_shape.x() == 1 && output_shape.y() == 1);
             bool       is_optimised                   = false;
 #if defined(__arm__)
-            is_optimised = std::is_same<FunctionType, NEConvolutionLayer>::value && NEScheduler::get().cpu_info().CPU == CPUTarget::ARMV7 && data_type == DataType::F32;
+            is_optimised = std::is_same<FunctionType, NEConvolutionLayer>::value && NEScheduler::get().cpu_info().CPU == CPUTarget::ARMV7 && _data_type == DataType::F32;
 #elif defined(__aarch64__)
-            is_optimised = std::is_same<FunctionType, NEConvolutionLayer>::value && NEScheduler::get().cpu_info().CPU >= CPUTarget::ARMV8 && data_type == DataType::F32;
+            is_optimised = std::is_same<FunctionType, NEConvolutionLayer>::value && NEScheduler::get().cpu_info().CPU >= CPUTarget::ARMV8 && _data_type == DataType::F32;
 #endif /* defined(__arm__) || defined(__aarch64__) */
 
             TensorShape     tmp_weights_shape(weights_shape);
-            SimpleTensor<T> tmp_weights(tmp_weights_shape, data_type, 1, fixed_point_position);
-            SimpleTensor<T> tmp_bias(bias_shape, data_type, 1, fixed_point_position);
+            SimpleTensor<T> tmp_weights(tmp_weights_shape, _data_type, 1, _fractional_bits, _quantization_info);
 
             // Fill with original shape
             fill(tmp_weights, 1);
-            fill(tmp_bias, 2);
 
-            tmp_weights = linearise_weights(tmp_weights, &tmp_bias);
+            if(_is_quantized)
+            {
+                fill(AccessorType(bias), 2);
+                tmp_weights = linearise_weights(tmp_weights);
+            }
+            else
+            {
+                SimpleTensor<T> tmp_bias(bias_shape, _bias_data_type, 1, _fractional_bits, _quantization_info);
+                fill(tmp_bias, 2);
+                tmp_weights = linearise_weights(tmp_weights, &tmp_bias);
+            }
 
             if(!is_fully_connected_convolution && !is_optimised)
             {
@@ -192,13 +220,12 @@
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info,
-                                      DataType data_type, int fixed_point_position)
+    SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info)
     {
         // Create reference
-        SimpleTensor<T> src{ input_shape, data_type, 1, fixed_point_position };
-        SimpleTensor<T> weights{ weights_shape, data_type, 1, fixed_point_position };
-        SimpleTensor<T> bias{ bias_shape, data_type, 1, fixed_point_position };
+        SimpleTensor<T>     src{ input_shape, _data_type, 1, _fractional_bits, _quantization_info };
+        SimpleTensor<T>     weights{ weights_shape, _data_type, 1, _fractional_bits, _quantization_info };
+        SimpleTensor<TBias> bias{ bias_shape, _bias_data_type, 1, _fractional_bits, _quantization_info };
 
         // Fill reference
         fill(src, 0);
@@ -208,10 +235,13 @@
         return reference::convolution_layer<T>(src, weights, bias, output_shape, info);
     }
 
-    TensorType      _target{};
-    SimpleTensor<T> _reference{};
-    int             _fractional_bits{};
-    DataType        _data_type{};
+    TensorType       _target{};
+    SimpleTensor<T>  _reference{};
+    DataType         _data_type{};
+    DataType         _bias_data_type{};
+    int              _fractional_bits{};
+    QuantizationInfo _quantization_info{};
+    bool             _is_quantized = false;
 
 private:
     template <typename U>
@@ -241,7 +271,6 @@
 
             dst[dst_idx] = weights[weights_idx];
         }
-
         if(biases != nullptr)
         {
             // Fill last row with biases
@@ -260,13 +289,37 @@
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class ConvolutionValidationFixture : public ConvolutionValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>
+class ConvolutionValidationFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
 {
 public:
     template <typename...>
     void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, bool reshape_weights, DataType data_type)
     {
-        ConvolutionValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, data_type, 0);
+        ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, data_type, 0, QuantizationInfo());
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ConvolutionValidationFixedPointFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, bool reshape_weights, DataType data_type, int fractional_bits)
+    {
+        ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, data_type, fractional_bits,
+                                                                                              QuantizationInfo());
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ConvolutionValidationQuantizedFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, bool reshape_weights, DataType data_type,
+               QuantizationInfo quantization_info)
+    {
+        ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, data_type, 0, quantization_info);
     }
 };
 } // namespace validation
diff --git a/tests/validation/fixtures/DeconvolutionLayerFixture.h b/tests/validation/fixtures/DeconvolutionLayerFixture.h
new file mode 100644
index 0000000..e98f5e9
--- /dev/null
+++ b/tests/validation/fixtures/DeconvolutionLayerFixture.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/DeconvolutionLayer.h"
+
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DeconvolutionLayerFixtureBase : public framework::Fixture
+{
+public:
+    /*
+     *
+     * @param[in] a The number of zeros added to right and bottom edges of the input.
+     * @param[in] u How much to scale the X and Y axis.
+     */
+    template <typename...>
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
+               const std::pair<unsigned int, unsigned int> &a, const std::pair<unsigned int, unsigned int> &u, DataType data_type, int fractional_bits)
+    {
+        _fractional_bits = fractional_bits;
+        _data_type       = data_type;
+
+        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, a, u, data_type, fractional_bits);
+        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, a, data_type, fractional_bits);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        switch(tensor.data_type())
+        {
+            case DataType::F32:
+            {
+                std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            default:
+                library->fill_tensor_uniform(tensor, i);
+        }
+    }
+    /*
+     *
+     * @param[in] a The number of zeros added to right and bottom edges of the input.
+     * @param[in] u How much to scale the X and Y axis.
+     */
+    TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape,
+                              const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &a, const std::pair<float, float> &u, DataType data_type, int fixed_point_position)
+    {
+        // Create tensors
+        TensorType src     = create_tensor<TensorType>(input_shape, data_type, 1, fixed_point_position);
+        TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, fixed_point_position);
+        TensorType bias    = create_tensor<TensorType>(bias_shape, data_type, 1, fixed_point_position);
+        TensorType dst     = create_tensor<TensorType>(output_shape, data_type, 1, fixed_point_position);
+
+        // Create and configure function
+        FunctionType conv;
+        conv.configure(&src, &weights, &bias, &dst, info, a.first, a.second, u.first, u.second);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        weights.allocator()->allocate();
+        bias.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src), 0);
+        fill(AccessorType(weights), 1);
+        fill(AccessorType(bias), 2);
+
+        // Compute NEConvolutionLayer function
+        conv.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape,
+                                      const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> a, DataType data_type, int fixed_point_position)
+    {
+        // Create reference
+        SimpleTensor<T> src{ input_shape, data_type, 1, fixed_point_position };
+        SimpleTensor<T> weights{ weights_shape, data_type, 1, fixed_point_position };
+        SimpleTensor<T> bias{ bias_shape, data_type, 1, fixed_point_position };
+
+        // Fill reference
+        fill(src, 0);
+        fill(weights, 1);
+        fill(bias, 2);
+
+        return reference::deconvolution_layer<T>(src, weights, bias, output_shape, info, a);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+    int             _fractional_bits{};
+    DataType        _data_type{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, unsigned int kernel_size_x, unsigned int kernel_size_y>
+class DeconvolutionValidationFixture : public DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int padx, unsigned int pady,
+               unsigned int ax, unsigned int ay, unsigned int ux, unsigned int uy, unsigned int num_kernels, DataType data_type)
+    {
+        ARM_COMPUTE_ERROR_ON_MSG(kernel_size_x != kernel_size_y, "Only square kernels supported");
+        const TensorShape   weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels);
+        const TensorShape   bias_shape(num_kernels);
+        const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL);
+        const std::pair<unsigned int, unsigned int> a(ax, ay);
+        const std::pair<float, float>               u(ux, uy);
+        auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, padx, pady, a.first, a.second, u.first, u.second,
+                                                       DimensionRoundingType::CEIL);
+        TensorShape output_shape = deconvolution_output_shape(out_dim, input_shape, weights_shape);
+        DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, a, u, data_type, 0);
+    }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/fixtures/DepthConcatenateLayerFixture.h b/tests/validation/fixtures/DepthConcatenateLayerFixture.h
index 633dba2..e7fe634 100644
--- a/tests/validation/fixtures/DepthConcatenateLayerFixture.h
+++ b/tests/validation/fixtures/DepthConcatenateLayerFixture.h
@@ -31,8 +31,8 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/DepthConcatenateLayer.h"
 #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/DepthConcatenateLayer.h"
 
 #include <random>
 
@@ -43,7 +43,7 @@
 namespace validation
 {
 template <typename TensorType, typename ITensorType, typename AccessorType, typename FunctionType, typename T>
-class DepthConcatenateValidationFixture : public framework::Fixture
+class DepthConcatenateLayerValidationFixture : public framework::Fixture
 {
 public:
     template <typename...>
diff --git a/tests/validation/fixtures/DepthConvertFixture.h b/tests/validation/fixtures/DepthConvertLayerFixture.h
similarity index 85%
rename from tests/validation/fixtures/DepthConvertFixture.h
rename to tests/validation/fixtures/DepthConvertLayerFixture.h
index b132a93..4b4e959 100644
--- a/tests/validation/fixtures/DepthConvertFixture.h
+++ b/tests/validation/fixtures/DepthConvertLayerFixture.h
@@ -31,8 +31,8 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/DepthConvert.h"
 #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/DepthConvertLayer.h"
 
 namespace arm_compute
 {
@@ -41,7 +41,7 @@
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
-class DepthConvertValidationFixedPointFixture : public framework::Fixture
+class DepthConvertLayerValidationFixedPointFixture : public framework::Fixture
 {
 public:
     template <typename...>
@@ -106,23 +106,23 @@
     int              _shift{};
 };
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
-class DepthConvertValidationFixture : public DepthConvertValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T1, T2>
+class DepthConvertLayerValidationFixture : public DepthConvertLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T1, T2>
 {
 public:
     template <typename...>
     void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift)
     {
-        DepthConvertValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, dt_in, dt_out, policy, shift, 0);
+        DepthConvertLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, dt_in, dt_out, policy, shift, 0);
     }
 };
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
-class DepthConvertValidationFractionalBitsFixture : public DepthConvertValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T1, T2>
+class DepthConvertLayerValidationFractionalBitsFixture : public DepthConvertLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T1, T2>
 {
 public:
     template <typename...>
     void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t fractional_bits)
     {
-        DepthConvertValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, dt_in, dt_out, policy, 0, fractional_bits);
+        DepthConvertLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, dt_in, dt_out, policy, 0, fractional_bits);
     }
 };
 } // namespace validation
diff --git a/tests/validation/fixtures/DepthwiseConvolutionFixture.h b/tests/validation/fixtures/DepthwiseConvolutionFixture.h
deleted file mode 100644
index 4a890f6..0000000
--- a/tests/validation/fixtures/DepthwiseConvolutionFixture.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE
-#define ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/DepthwiseConvolution.h"
-#include "tests/validation/Helpers.h"
-
-#include <random>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class DepthwiseConvolutionValidationFixture : public framework::Fixture
-{
-public:
-    template <typename...>
-    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info)
-    {
-        _target    = compute_target(in_shape, weights_shape, out_shape, pad_stride_info);
-        _reference = compute_reference(in_shape, weights_shape, out_shape, pad_stride_info);
-    }
-
-protected:
-    template <typename U>
-    void fill(U &&tensor, int i)
-    {
-        switch(tensor.data_type())
-        {
-            case DataType::F32:
-            {
-                std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
-                library->fill(tensor, distribution, i);
-                break;
-            }
-            default:
-                library->fill_tensor_uniform(tensor, i);
-        }
-    }
-
-    TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &output_shape, PadStrideInfo &pad_stride_info)
-    {
-        // Create tensors
-        TensorType src     = create_tensor<TensorType>(input_shape, DataType::F32);
-        TensorType weights = create_tensor<TensorType>(weights_shape, DataType::F32);
-        TensorType dst     = create_tensor<TensorType>(output_shape, DataType::F32);
-
-        // Create Depthwise Convolution configure function
-        FunctionType depthwise_convolution;
-        depthwise_convolution.configure(&src, &dst, &weights, pad_stride_info);
-
-        // Allocate tensors
-        src.allocator()->allocate();
-        weights.allocator()->allocate();
-        dst.allocator()->allocate();
-
-        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
-        // Fill tensors
-        fill(AccessorType(src), 0);
-        fill(AccessorType(weights), 1);
-
-        // Compute function
-        depthwise_convolution.run();
-
-        return dst;
-    }
-
-    SimpleTensor<T> compute_reference(const TensorShape &in_shape, const TensorShape &weights_shape, const TensorShape &out_shape, const PadStrideInfo &pad_stride_info)
-    {
-        SimpleTensor<T> src(in_shape, DataType::F32);
-        SimpleTensor<T> weights(weights_shape, DataType::F32);
-
-        fill(src, 0);
-        fill(weights, 1);
-
-        return reference::depthwise_convolution(src, weights, out_shape, pad_stride_info);
-    }
-
-    TensorType      _target{};
-    SimpleTensor<T> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE */
diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
new file mode 100644
index 0000000..4426215
--- /dev/null
+++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE
+#define ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/DepthwiseConvolutionLayer.h"
+
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DepthwiseConvolutionLayerValidationGenericFixture : public framework::Fixture
+{
+public:
+    using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value, int32_t, T>::type;
+
+public:
+    template <typename...>
+    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info)
+    {
+        _quantization_info = quantization_info;
+        _data_type         = data_type;
+
+        const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+
+        _target    = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info);
+        _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        switch(tensor.data_type())
+        {
+            case DataType::QASYMM8:
+            {
+                std::uniform_int_distribution<uint8_t> distribution(0, 10);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            case DataType::F32:
+            {
+                std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            case DataType::S32:
+            {
+                std::uniform_int_distribution<int32_t> distribution(-1000, 1000);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            default:
+                library->fill_tensor_uniform(tensor, i);
+        }
+    }
+
+    TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &output_shape, PadStrideInfo &pad_stride_info,
+                              const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info)
+    {
+        // Create tensors
+        TensorType src     = create_tensor<TensorType>(input_shape, data_type, 1, 0, quantization_info);
+        TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, 0, quantization_info);
+        TensorType biases  = create_tensor<TensorType>(biases_shape, bias_data_type, 1, 0, quantization_info);
+        TensorType dst     = create_tensor<TensorType>(output_shape, data_type, 1, 0, quantization_info);
+
+        // Create Depthwise Convolution configure function
+        FunctionType dwc;
+        dwc.configure(&src, &weights, &biases, &dst, pad_stride_info);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(biases.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        weights.allocator()->allocate();
+        biases.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!biases.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src), 0);
+        fill(AccessorType(weights), 1);
+        fill(AccessorType(biases), 2);
+
+        // Compute function
+        dwc.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &in_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &out_shape, const PadStrideInfo &pad_stride_info,
+                                      const DataType data_type, const DataType bias_data_type, QuantizationInfo quantization_info)
+    {
+        SimpleTensor<T>     src{ in_shape, data_type, 1, 0, quantization_info };
+        SimpleTensor<T>     weights{ weights_shape, data_type, 1, 0, quantization_info };
+        SimpleTensor<TBias> biases{ biases_shape, data_type, 1, 0, quantization_info };
+
+        fill(src, 0);
+        fill(weights, 1);
+        fill(biases, 2);
+
+        return reference::depthwise_convolution(src, weights, biases, out_shape, pad_stride_info);
+    }
+
+    TensorType       _target{};
+    SimpleTensor<T>  _reference{};
+    DataType         _data_type{};
+    QuantizationInfo _quantization_info{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DepthwiseConvolutionLayerValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type)
+    {
+        DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info,
+                                                                                                            data_type, QuantizationInfo());
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DepthwiseConvolutionLayerValidationQuantizedFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info)
+    {
+        DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info,
+                                                                                                            data_type, quantization_info);
+    }
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE */
diff --git a/tests/validation/fixtures/DepthwiseSeparableConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseSeparableConvolutionLayerFixture.h
index e8f6854..8bce60c 100644
--- a/tests/validation/fixtures/DepthwiseSeparableConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DepthwiseSeparableConvolutionLayerFixture.h
@@ -31,8 +31,8 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/DepthwiseSeparableConvolutionLayer.h"
 #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/DepthwiseSeparableConvolutionLayer.h"
 
 #include <random>
 
@@ -47,22 +47,25 @@
 {
 public:
     template <typename...>
-    void setup(TensorShape in_shape, TensorShape depthwise_weights_shape, TensorShape depthwise_out_shape, TensorShape pointwise_weights_shape, TensorShape biases_shape, TensorShape output_shape,
+    void setup(TensorShape in_shape, TensorShape depthwise_weights_shape, TensorShape depthwise_biases_shape, TensorShape depthwise_out_shape, TensorShape pointwise_weights_shape,
+               TensorShape pointwise_biases_shape, TensorShape output_shape,
                PadStrideInfo pad_stride_depthwise_info, PadStrideInfo pad_stride_pointwise_info)
     {
-        _target    = compute_target(in_shape, depthwise_weights_shape, depthwise_out_shape, pointwise_weights_shape, biases_shape, output_shape, pad_stride_depthwise_info, pad_stride_pointwise_info);
-        _reference = compute_reference(in_shape, depthwise_weights_shape, depthwise_out_shape, pointwise_weights_shape, biases_shape, output_shape, pad_stride_depthwise_info, pad_stride_pointwise_info);
+        _target = compute_target(in_shape, depthwise_weights_shape, depthwise_biases_shape, depthwise_out_shape, pointwise_weights_shape, pointwise_biases_shape, output_shape, pad_stride_depthwise_info,
+                                 pad_stride_pointwise_info);
+        _reference = compute_reference(in_shape, depthwise_weights_shape, depthwise_biases_shape, depthwise_out_shape, pointwise_weights_shape, pointwise_biases_shape, output_shape, pad_stride_depthwise_info,
+                                       pad_stride_pointwise_info);
     }
 
 protected:
     template <typename U>
-    void fill(U &&tensor, int i)
+    void fill(U &&tensor, int i, bool zero_fill = false)
     {
         switch(tensor.data_type())
         {
             case DataType::F32:
             {
-                std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+                std::uniform_real_distribution<> distribution((zero_fill) ? 0.f : -1.0f, (zero_fill) ? 0.f : 1.0f);
                 library->fill(tensor, distribution, i);
                 break;
             }
@@ -71,42 +74,47 @@
         }
     }
 
-    TensorType compute_target(const TensorShape &input_shape, const TensorShape &depthwise_weights_shape, const TensorShape &depthwise_out_shape, const TensorShape &pointwise_weights_shape,
-                              const TensorShape &biases_shape,
-                              const TensorShape &output_shape, const PadStrideInfo &pad_stride_depthwise_info, const PadStrideInfo &pad_stride_pointwise_info)
+    TensorType compute_target(const TensorShape &input_shape, const TensorShape &depthwise_weights_shape, const TensorShape &depthwise_biases_shape, const TensorShape &depthwise_out_shape,
+                              const TensorShape &pointwise_weights_shape, const TensorShape &pointwise_biases_shape, const TensorShape &output_shape,
+                              const PadStrideInfo &pad_stride_depthwise_info, const PadStrideInfo &pad_stride_pointwise_info)
     {
         // Create tensors
         TensorType src               = create_tensor<TensorType>(input_shape, DataType::F32);
         TensorType depthwise_weights = create_tensor<TensorType>(depthwise_weights_shape, DataType::F32);
+        TensorType depthwise_biases  = create_tensor<TensorType>(depthwise_biases_shape, DataType::F32);
         TensorType depthwise_out     = create_tensor<TensorType>(depthwise_out_shape, DataType::F32);
         TensorType pointwise_weights = create_tensor<TensorType>(pointwise_weights_shape, DataType::F32);
-        TensorType biases            = create_tensor<TensorType>(biases_shape, DataType::F32);
+        TensorType pointwise_biases  = create_tensor<TensorType>(pointwise_biases_shape, DataType::F32);
         TensorType dst               = create_tensor<TensorType>(output_shape, DataType::F32);
 
         // Create Depthwise Separable Convolution Layer configure function
-        CLDepthwiseSeparableConvolutionLayer depthwise_separable_convolution_layer;
-        depthwise_separable_convolution_layer.configure(&src, &depthwise_weights, &depthwise_out, &pointwise_weights, &biases, &dst, pad_stride_depthwise_info, pad_stride_pointwise_info);
+        FunctionType depthwise_separable_convolution_layer;
+        depthwise_separable_convolution_layer.configure(&src, &depthwise_weights, &depthwise_biases, &depthwise_out, &pointwise_weights, &pointwise_biases, &dst, pad_stride_depthwise_info,
+                                                        pad_stride_pointwise_info);
 
         // Allocate tensors
         src.allocator()->allocate();
         depthwise_weights.allocator()->allocate();
+        depthwise_biases.allocator()->allocate();
         depthwise_out.allocator()->allocate();
         pointwise_weights.allocator()->allocate();
-        biases.allocator()->allocate();
+        pointwise_biases.allocator()->allocate();
         dst.allocator()->allocate();
 
         ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(!depthwise_weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!depthwise_biases.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(!depthwise_out.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(!pointwise_weights.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!biases.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!pointwise_biases.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
         // Fill tensors
         fill(AccessorType(src), 0);
         fill(AccessorType(depthwise_weights), 1);
-        fill(AccessorType(pointwise_weights), 2);
-        fill(AccessorType(biases), 3);
+        fill(AccessorType(depthwise_biases), 2, true);
+        fill(AccessorType(pointwise_weights), 3);
+        fill(AccessorType(pointwise_biases), 4);
 
         // Compute function
         depthwise_separable_convolution_layer.run();
@@ -114,20 +122,27 @@
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &in_shape, const TensorShape &depthwise_weights_shape, const TensorShape &depthwise_out_shape, const TensorShape &pointwise_weights_shape,
-                                      const TensorShape &biases_shape, const TensorShape &dst_shape, const PadStrideInfo &pad_stride_depthwise_info, const PadStrideInfo &pad_stride_pointwise_info)
+    SimpleTensor<T> compute_reference(const TensorShape &in_shape, const TensorShape &depthwise_weights_shape, const TensorShape &depthwise_biases_shape, const TensorShape &depthwise_out_shape,
+                                      const TensorShape &pointwise_weights_shape, const TensorShape &pointwise_biases_shape, const TensorShape &dst_shape,
+                                      const PadStrideInfo &pad_stride_depthwise_info, const PadStrideInfo &pad_stride_pointwise_info)
     {
         SimpleTensor<T> src(in_shape, DataType::F32);
         SimpleTensor<T> depthwise_weights(depthwise_weights_shape, DataType::F32);
+        SimpleTensor<T> depthwise_biases(depthwise_biases_shape, DataType::F32);
         SimpleTensor<T> pointwise_weights(pointwise_weights_shape, DataType::F32);
-        SimpleTensor<T> biases(biases_shape, DataType::F32);
+        SimpleTensor<T> pointwise_biases(pointwise_biases_shape, DataType::F32);
 
         fill(src, 0);
         fill(depthwise_weights, 1);
-        fill(pointwise_weights, 2);
-        fill(biases, 3);
+        fill(depthwise_biases, 2, true);
+        fill(pointwise_weights, 3);
+        fill(pointwise_biases, 4);
 
-        return reference::depthwise_separable_convolution_layer(src, depthwise_weights, depthwise_out_shape, pointwise_weights, biases, dst_shape, pad_stride_depthwise_info, pad_stride_pointwise_info);
+        return reference::depthwise_separable_convolution_layer(src,
+                                                                depthwise_weights, depthwise_biases, depthwise_out_shape,
+                                                                pointwise_weights, pointwise_biases,
+                                                                dst_shape,
+                                                                pad_stride_depthwise_info, pad_stride_pointwise_info);
     }
 
     TensorType      _target{};
diff --git a/tests/validation/fixtures/DequantizationLayerFixture.h b/tests/validation/fixtures/DequantizationLayerFixture.h
index 28d43cf..5855820 100644
--- a/tests/validation/fixtures/DequantizationLayerFixture.h
+++ b/tests/validation/fixtures/DequantizationLayerFixture.h
@@ -32,7 +32,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/DequantizationLayer.h"
+#include "tests/validation/reference/DequantizationLayer.h"
 
 #include <random>
 
diff --git a/tests/validation/fixtures/L2NormalizeFixture.h b/tests/validation/fixtures/DilateFixture.h
similarity index 69%
copy from tests/validation/fixtures/L2NormalizeFixture.h
copy to tests/validation/fixtures/DilateFixture.h
index e611393..aa531a1 100644
--- a/tests/validation/fixtures/L2NormalizeFixture.h
+++ b/tests/validation/fixtures/DilateFixture.h
@@ -21,18 +21,19 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE
-#define ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE
+#ifndef ARM_COMPUTE_TEST_DILATE_FIXTURE
+#define ARM_COMPUTE_TEST_DILATE_FIXTURE
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/Tensor.h"
 #include "tests/AssetsLibrary.h"
 #include "tests/Globals.h"
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/L2Normalize.h"
+#include "tests/validation/reference/Dilate.h"
+
+#include <random>
 
 namespace arm_compute
 {
@@ -41,14 +42,19 @@
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class L2NormalizeValidationFixture : public framework::Fixture
+class DilateValidationFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type, unsigned int axis, float epsilon)
+    void setup(TensorShape shape, DataType data_type, BorderMode border_mode)
     {
-        _target    = compute_target(shape, data_type, axis, epsilon);
-        _reference = compute_reference(shape, data_type, axis, epsilon);
+        std::mt19937                           gen(library->seed());
+        std::uniform_int_distribution<uint8_t> distribution(0, 255);
+        const uint8_t                          constant_border_value = distribution(gen);
+
+        _border_mode = border_mode;
+        _target      = compute_target(shape, data_type, border_mode, constant_border_value);
+        _reference   = compute_reference(shape, data_type, border_mode, constant_border_value);
     }
 
 protected:
@@ -58,15 +64,15 @@
         library->fill_tensor_uniform(tensor, 0);
     }
 
-    TensorType compute_target(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon)
+    TensorType compute_target(const TensorShape &shape, DataType data_type, BorderMode border_mode, uint8_t constant_border_value)
     {
         // Create tensors
         TensorType src = create_tensor<TensorType>(shape, data_type);
         TensorType dst = create_tensor<TensorType>(shape, data_type);
 
         // Create and configure function
-        FunctionType l2_norm_func;
-        l2_norm_func.configure(&src, &dst, axis, epsilon);
+        FunctionType dilate;
+        dilate.configure(&src, &dst, border_mode, constant_border_value);
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -82,26 +88,30 @@
         fill(AccessorType(src));
 
         // Compute function
-        l2_norm_func.run();
+        dilate.run();
 
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon)
+    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, BorderMode border_mode, uint8_t constant_border_value)
     {
+        ARM_COMPUTE_ERROR_ON(data_type != DataType::U8);
+
         // Create reference
         SimpleTensor<T> src{ shape, data_type };
 
         // Fill reference
         fill(src);
 
-        return reference::l2_normalize<T>(src, axis, epsilon);
+        // Compute reference
+        return reference::dilate<T>(src, border_mode, constant_border_value);
     }
 
+    BorderMode      _border_mode{};
     TensorType      _target{};
     SimpleTensor<T> _reference{};
 };
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE */
+#endif /* ARM_COMPUTE_TEST_DILATE_FIXTURE */
diff --git a/tests/validation/fixtures/DirectConvolutionLayerFixture.h b/tests/validation/fixtures/DirectConvolutionLayerFixture.h
index a709157..d63a5bc 100644
--- a/tests/validation/fixtures/DirectConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DirectConvolutionLayerFixture.h
@@ -28,9 +28,9 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/ConvolutionLayer.h"
 #include "tests/validation/Helpers.h"
 #include "tests/validation/fixtures/ConvolutionLayerFixture.h"
+#include "tests/validation/reference/ConvolutionLayer.h"
 
 #include <random>
 
@@ -41,22 +41,42 @@
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class DirectConvolutionValidationFixedPointFixture : public framework::Fixture
+class DirectConvolutionValidationGenericFixture : public framework::Fixture
 {
 public:
+    using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value, int32_t, T>::type;
+
+public:
     template <typename...>
-    void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type, int fractional_bits)
+    void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels,
+               DataType data_type, int fractional_bits, QuantizationInfo quantization_info)
     {
-        _fractional_bits = fractional_bits;
-        _data_type       = data_type;
+        _fractional_bits   = fractional_bits;
+        _quantization_info = quantization_info;
+        _data_type         = data_type;
 
         const TensorShape   weights_shape(kernel_size, kernel_size, input_shape.z(), num_kernels);
         const TensorShape   bias_shape(num_kernels);
         const PadStrideInfo info(stride_x, stride_y, pad_x, pad_y, DimensionRoundingType::FLOOR);
-        const TensorShape   output_shape = get_output_shape(input_shape, weights_shape, info);
+        const TensorShape   output_shape   = get_output_shape(input_shape, weights_shape, info);
+        const DataType      bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
 
-        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, fractional_bits);
-        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, fractional_bits);
+        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, fractional_bits, quantization_info);
+        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, fractional_bits, quantization_info);
+    }
+
+    template <typename...>
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
+               DataType data_type, int fractional_bits, QuantizationInfo quantization_info)
+    {
+        _fractional_bits   = fractional_bits;
+        _quantization_info = quantization_info;
+        _data_type         = data_type;
+
+        const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+
+        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, fractional_bits, quantization_info);
+        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, fractional_bits, quantization_info);
     }
 
 protected:
@@ -65,6 +85,12 @@
     {
         switch(tensor.data_type())
         {
+            case DataType::QASYMM8:
+            {
+                std::uniform_int_distribution<uint8_t> distribution(0, 50);
+                library->fill(tensor, distribution, i);
+                break;
+            }
             case DataType::F16:
             case DataType::F32:
             {
@@ -72,19 +98,25 @@
                 library->fill(tensor, distribution, i);
                 break;
             }
+            case DataType::S32:
+            {
+                std::uniform_int_distribution<int32_t> distribution(-5, 5);
+                library->fill(tensor, distribution, i);
+                break;
+            }
             default:
                 library->fill_tensor_uniform(tensor, i);
         }
     }
 
     TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info,
-                              DataType data_type, int fixed_point_position)
+                              DataType data_type, DataType bias_data_type, int fixed_point_position, QuantizationInfo quantization_info)
     {
         // Create tensors
-        TensorType src     = create_tensor<TensorType>(input_shape, data_type, 1, fixed_point_position);
-        TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, fixed_point_position);
-        TensorType bias    = create_tensor<TensorType>(bias_shape, data_type, 1, fixed_point_position);
-        TensorType dst     = create_tensor<TensorType>(output_shape, data_type, 1, fixed_point_position);
+        TensorType src     = create_tensor<TensorType>(input_shape, data_type, 1, fixed_point_position, quantization_info);
+        TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, fixed_point_position, quantization_info);
+        TensorType bias    = create_tensor<TensorType>(bias_shape, bias_data_type, 1, fixed_point_position, quantization_info);
+        TensorType dst     = create_tensor<TensorType>(output_shape, data_type, 1, fixed_point_position, quantization_info);
 
         // Create and configure function
         FunctionType conv;
@@ -118,12 +150,12 @@
     }
 
     SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info,
-                                      DataType data_type, int fixed_point_position)
+                                      DataType data_type, DataType bias_data_type, int fixed_point_position, QuantizationInfo quantization_info)
     {
         // Create reference
-        SimpleTensor<T> src{ input_shape, data_type, 1, fixed_point_position };
-        SimpleTensor<T> weights{ weights_shape, data_type, 1, fixed_point_position };
-        SimpleTensor<T> bias{ bias_shape, data_type, 1, fixed_point_position };
+        SimpleTensor<T>     src{ input_shape, data_type, 1, fixed_point_position, quantization_info };
+        SimpleTensor<T>     weights{ weights_shape, data_type, 1, fixed_point_position, quantization_info };
+        SimpleTensor<TBias> bias{ bias_shape, bias_data_type, 1, fixed_point_position, quantization_info };
 
         // Fill reference
         fill(src, 0);
@@ -133,10 +165,11 @@
         return reference::convolution_layer<T>(src, weights, bias, output_shape, info);
     }
 
-    TensorType      _target{};
-    SimpleTensor<T> _reference{};
-    int             _fractional_bits{};
-    DataType        _data_type{};
+    TensorType       _target{};
+    SimpleTensor<T>  _reference{};
+    int              _fractional_bits{};
+    QuantizationInfo _quantization_info{};
+    DataType         _data_type{};
 
 private:
     TensorShape get_output_shape(TensorShape in_shape, TensorShape kernel_shape, const PadStrideInfo &info)
@@ -155,15 +188,63 @@
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class DirectConvolutionValidationFixture : public DirectConvolutionValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>
+class DirectConvolutionValidationFixture : public DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
 {
 public:
     template <typename...>
     void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type)
     {
-        DirectConvolutionValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, 0);
+        DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, 0, QuantizationInfo());
     }
 };
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DirectConvolutionValidationFixedPointFixture : public DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type, int fractional_bits)
+    {
+        DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, fractional_bits,
+                                                                                                    QuantizationInfo());
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DirectConvolutionValidationQuantizedFixture : public DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, int stride_x, int stride_y, int pad_x, int pad_y, unsigned int kernel_size, unsigned int num_kernels, DataType data_type, QuantizationInfo quantization_info)
+    {
+        DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, 0, quantization_info);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DirectConvolutionValidationWithTensorShapesQuantizedFixture : public DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
+               DataType data_type, QuantizationInfo quantization_info)
+    {
+        DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, 0, quantization_info);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DirectConvolutionValidationWithTensorShapesFixture : public DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
+               DataType data_type)
+    {
+        DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, 0, QuantizationInfo());
+    }
+};
+
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/fixtures/L2NormalizeFixture.h b/tests/validation/fixtures/DropoutLayerFixture.h
similarity index 70%
copy from tests/validation/fixtures/L2NormalizeFixture.h
copy to tests/validation/fixtures/DropoutLayerFixture.h
index e611393..3a077db 100644
--- a/tests/validation/fixtures/L2NormalizeFixture.h
+++ b/tests/validation/fixtures/DropoutLayerFixture.h
@@ -21,8 +21,9 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE
-#define ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE
+
+#ifndef ARM_COMPUTE_TEST_DROPOUT_LAYER_FIXTURE
+#define ARM_COMPUTE_TEST_DROPOUT_LAYER_FIXTURE
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -32,7 +33,8 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/L2Normalize.h"
+
+#include <random>
 
 namespace arm_compute
 {
@@ -41,14 +43,13 @@
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class L2NormalizeValidationFixture : public framework::Fixture
+class DropoutLayerValidationFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type, unsigned int axis, float epsilon)
+    void setup(TensorShape shape, float ratio, bool forward, DataType data_type)
     {
-        _target    = compute_target(shape, data_type, axis, epsilon);
-        _reference = compute_reference(shape, data_type, axis, epsilon);
+        _target = compute_target(shape, ratio, forward, data_type);
     }
 
 protected:
@@ -58,50 +59,48 @@
         library->fill_tensor_uniform(tensor, 0);
     }
 
-    TensorType compute_target(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon)
+    TensorType compute_target(const TensorShape &shape, float ratio, bool forward, DataType data_type)
     {
         // Create tensors
-        TensorType src = create_tensor<TensorType>(shape, data_type);
-        TensorType dst = create_tensor<TensorType>(shape, data_type);
+        TensorType src  = create_tensor<TensorType>(shape, data_type, 1);
+        TensorType mask = create_tensor<TensorType>(shape, data_type, 1);
+        TensorType dst  = create_tensor<TensorType>(shape, data_type, 1);
 
         // Create and configure function
-        FunctionType l2_norm_func;
-        l2_norm_func.configure(&src, &dst, axis, epsilon);
+        FunctionType dropout_layer;
+        dropout_layer.configure(&src, &mask, &dst, ratio, forward);
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
         // Allocate tensors
         src.allocator()->allocate();
+        mask.allocator()->allocate();
         dst.allocator()->allocate();
 
         ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!mask.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
 
         // Fill tensors
         fill(AccessorType(src));
 
         // Compute function
-        l2_norm_func.run();
+        dropout_layer.run();
 
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon)
+    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type)
     {
-        // Create reference
-        SimpleTensor<T> src{ shape, data_type };
-
-        // Fill reference
-        fill(src);
-
-        return reference::l2_normalize<T>(src, axis, epsilon);
     }
 
     TensorType      _target{};
     SimpleTensor<T> _reference{};
+    int             _fractional_bits{};
 };
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE */
+
+#endif /* ARM_COMPUTE_TEST_DROPOUT_LAYER_FIXTURE */
diff --git a/tests/validation/fixtures/L2NormalizeFixture.h b/tests/validation/fixtures/ErodeFixture.h
similarity index 70%
copy from tests/validation/fixtures/L2NormalizeFixture.h
copy to tests/validation/fixtures/ErodeFixture.h
index e611393..d37cac0 100644
--- a/tests/validation/fixtures/L2NormalizeFixture.h
+++ b/tests/validation/fixtures/ErodeFixture.h
@@ -21,18 +21,19 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE
-#define ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE
+#ifndef ARM_COMPUTE_TEST_ERODE_FIXTURE
+#define ARM_COMPUTE_TEST_ERODE_FIXTURE
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/Tensor.h"
 #include "tests/AssetsLibrary.h"
 #include "tests/Globals.h"
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/L2Normalize.h"
+#include "tests/validation/reference/Erode.h"
+
+#include <random>
 
 namespace arm_compute
 {
@@ -41,14 +42,19 @@
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class L2NormalizeValidationFixture : public framework::Fixture
+class ErodeValidationFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type, unsigned int axis, float epsilon)
+    void setup(TensorShape shape, DataType data_type, BorderMode border_mode)
     {
-        _target    = compute_target(shape, data_type, axis, epsilon);
-        _reference = compute_reference(shape, data_type, axis, epsilon);
+        std::mt19937                           gen(library->seed());
+        std::uniform_int_distribution<uint8_t> distribution(0, 255);
+        const uint8_t                          constant_border_value = distribution(gen);
+
+        _border_mode = border_mode;
+        _target      = compute_target(shape, data_type, border_mode, constant_border_value);
+        _reference   = compute_reference(shape, data_type, border_mode, constant_border_value);
     }
 
 protected:
@@ -58,15 +64,15 @@
         library->fill_tensor_uniform(tensor, 0);
     }
 
-    TensorType compute_target(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon)
+    TensorType compute_target(const TensorShape &shape, DataType data_type, BorderMode border_mode, uint8_t constant_border_value)
     {
         // Create tensors
         TensorType src = create_tensor<TensorType>(shape, data_type);
         TensorType dst = create_tensor<TensorType>(shape, data_type);
 
         // Create and configure function
-        FunctionType l2_norm_func;
-        l2_norm_func.configure(&src, &dst, axis, epsilon);
+        FunctionType erode;
+        erode.configure(&src, &dst, border_mode, constant_border_value);
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -82,26 +88,30 @@
         fill(AccessorType(src));
 
         // Compute function
-        l2_norm_func.run();
+        erode.run();
 
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon)
+    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, BorderMode border_mode, uint8_t constant_border_value)
     {
+        ARM_COMPUTE_ERROR_ON(data_type != DataType::U8);
+
         // Create reference
         SimpleTensor<T> src{ shape, data_type };
 
         // Fill reference
         fill(src);
 
-        return reference::l2_normalize<T>(src, axis, epsilon);
+        // Compute reference
+        return reference::erode<T>(src, border_mode, constant_border_value);
     }
 
+    BorderMode      _border_mode{};
     TensorType      _target{};
     SimpleTensor<T> _reference{};
 };
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE */
+#endif /* ARM_COMPUTE_TEST_ERODE_FIXTURE */
diff --git a/tests/validation/fixtures/FixedPointFixture.h b/tests/validation/fixtures/FixedPointFixture.h
index 2980e2f..3f3bb6e 100644
--- a/tests/validation/fixtures/FixedPointFixture.h
+++ b/tests/validation/fixtures/FixedPointFixture.h
@@ -31,8 +31,8 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/FixedPoint.h"
 #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/FixedPoint.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/fixtures/FixedPointPixelWiseMultiplicationFixture.h b/tests/validation/fixtures/FixedPointPixelWiseMultiplicationFixture.h
new file mode 100644
index 0000000..73386df
--- /dev/null
+++ b/tests/validation/fixtures/FixedPointPixelWiseMultiplicationFixture.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_FIXED_POINT_PIXEL_WISE_MULTIPLICATION_FIXTURE
+#define ARM_COMPUTE_TEST_FIXED_POINT_PIXEL_WISE_MULTIPLICATION_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/FixedPointPixelWiseMultiplication.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class FixedPointPixelWiseMultiplicationValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape    shape,
+               DataType       dt_in1,
+               DataType       dt_in2,
+               float          scale,
+               ConvertPolicy  convert_policy,
+               RoundingPolicy rounding_policy,
+               int            fixed_point_position)
+    {
+        _target    = compute_target(shape, dt_in1, dt_in2, scale, convert_policy, rounding_policy, fixed_point_position);
+        _reference = compute_reference(shape, dt_in1, dt_in2, scale, convert_policy, fixed_point_position);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, unsigned int seed_offset)
+    {
+        library->fill_tensor_uniform(tensor, seed_offset);
+    }
+
+    TensorType compute_target(const TensorShape &shape, DataType dt_in1, DataType dt_in2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, int fixed_point_position)
+    {
+        // Create tensors
+        TensorType src1 = create_tensor<TensorType>(shape, dt_in1, 1, fixed_point_position);
+        TensorType src2 = create_tensor<TensorType>(shape, dt_in2, 1, fixed_point_position);
+        TensorType dst  = create_tensor<TensorType>(shape, dt_in2, 1, fixed_point_position);
+
+        // Create and configure function
+        FunctionType multiply;
+        multiply.configure(&src1, &src2, &dst, scale, convert_policy, rounding_policy);
+
+        ARM_COMPUTE_EXPECT(src1.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(src2.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src1.allocator()->allocate();
+        src2.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src1.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!src2.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src1), 0);
+        fill(AccessorType(src2), 1);
+
+        // Compute function
+        multiply.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType dt_in1, DataType dt_in2, float scale, ConvertPolicy convert_policy, int fixed_point_position)
+    {
+        // Create reference
+        SimpleTensor<T> src1{ shape, dt_in1, 1, fixed_point_position };
+        SimpleTensor<T> src2{ shape, dt_in2, 1, fixed_point_position };
+
+        // Fill reference
+        fill(src1, 0);
+        fill(src2, 1);
+
+        return reference::fixed_point_pixel_wise_multiplication<T>(src1, src2, scale, convert_policy);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_FIXED_POINT_PIXEL_WISE_MULTIPLICATION_FIXTURE */
diff --git a/tests/validation/fixtures/FlattenLayerFixture.h b/tests/validation/fixtures/FlattenLayerFixture.h
index faf53c7..436138b 100644
--- a/tests/validation/fixtures/FlattenLayerFixture.h
+++ b/tests/validation/fixtures/FlattenLayerFixture.h
@@ -33,7 +33,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/FlattenLayer.h"
+#include "tests/validation/reference/FlattenLayer.h"
 
 #include <random>
 
diff --git a/tests/validation/fixtures/FloorFixture.h b/tests/validation/fixtures/FloorFixture.h
index 3f94841..246105e 100644
--- a/tests/validation/fixtures/FloorFixture.h
+++ b/tests/validation/fixtures/FloorFixture.h
@@ -32,7 +32,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/Floor.h"
+#include "tests/validation/reference/Floor.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/fixtures/FullyConnectedLayerFixture.h b/tests/validation/fixtures/FullyConnectedLayerFixture.h
index b19c40d..f23fc20 100644
--- a/tests/validation/fixtures/FullyConnectedLayerFixture.h
+++ b/tests/validation/fixtures/FullyConnectedLayerFixture.h
@@ -33,9 +33,9 @@
 #include "tests/RawTensor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/FullyConnectedLayer.h"
-#include "tests/validation/CPP/Utils.h"
 #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/FullyConnectedLayer.h"
+#include "tests/validation/reference/Utils.h"
 
 #include <random>
 
@@ -46,27 +46,43 @@
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool run_interleave>
-class FullyConnectedLayerValidationFixedPointFixture : public framework::Fixture
+class FullyConnectedLayerValidationGenericFixture : public framework::Fixture
 {
 public:
+    using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value, int32_t, T>::type;
+
+public:
     template <typename...>
-    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type, int fractional_bits)
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights,
+               DataType data_type, int fractional_bits, QuantizationInfo quantization_info)
     {
         ARM_COMPUTE_UNUSED(weights_shape);
         ARM_COMPUTE_UNUSED(bias_shape);
 
-        _fractional_bits = fractional_bits;
-        _data_type       = data_type;
+        _data_type         = data_type;
+        _bias_data_type    = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+        _fractional_bits   = fractional_bits;
+        _quantization_info = quantization_info;
 
-        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, transpose_weights, reshape_weights, data_type, fractional_bits);
-        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, transpose_weights, reshape_weights, data_type, fractional_bits);
+        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, transpose_weights, reshape_weights);
+        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, transpose_weights, reshape_weights);
     }
 
 protected:
     template <typename U>
     void fill(U &&tensor, int i)
     {
-        if(is_data_type_float(_data_type))
+        if(is_data_type_quantized_asymmetric(_data_type))
+        {
+            std::uniform_int_distribution<uint8_t> distribution(0, 30);
+            library->fill(tensor, distribution, i);
+        }
+        else if(_data_type == DataType::S32)
+        {
+            std::uniform_int_distribution<int32_t> distribution(-50, 50);
+            library->fill(tensor, distribution, i);
+        }
+        else if(is_data_type_float(_data_type))
         {
             std::uniform_real_distribution<> distribution(0.5f, 1.f);
             library->fill(tensor, distribution, i);
@@ -78,7 +94,7 @@
     }
 
     TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, bool transpose_weights,
-                              bool reshape_weights, DataType data_type, int fixed_point_position)
+                              bool reshape_weights)
     {
         TensorShape reshaped_weights_shape(weights_shape);
 
@@ -102,7 +118,7 @@
             // Transpose 1xW for batched version
             if(!reshape_weights && output_shape.y() > 1 && run_interleave)
             {
-                const int   transpose_width = 16 / data_size_from_type(data_type);
+                const int   transpose_width = 16 / data_size_from_type(_data_type);
                 const float shape_x         = reshaped_weights_shape.x();
                 reshaped_weights_shape.set(0, reshaped_weights_shape.y() * transpose_width);
                 reshaped_weights_shape.set(1, static_cast<unsigned int>(std::ceil(shape_x / transpose_width)));
@@ -110,10 +126,10 @@
         }
 
         // Create tensors
-        TensorType src     = create_tensor<TensorType>(input_shape, data_type, 1, fixed_point_position);
-        TensorType weights = create_tensor<TensorType>(reshaped_weights_shape, data_type, 1, fixed_point_position);
-        TensorType bias    = create_tensor<TensorType>(bias_shape, data_type, 1, fixed_point_position);
-        TensorType dst     = create_tensor<TensorType>(output_shape, data_type, 1, fixed_point_position);
+        TensorType src     = create_tensor<TensorType>(input_shape, _data_type, 1, _fractional_bits, _quantization_info);
+        TensorType weights = create_tensor<TensorType>(reshaped_weights_shape, _data_type, 1, _fractional_bits, _quantization_info);
+        TensorType bias    = create_tensor<TensorType>(bias_shape, _bias_data_type, 1, _fractional_bits, _quantization_info);
+        TensorType dst     = create_tensor<TensorType>(output_shape, _data_type, 1, _fractional_bits, _quantization_info);
 
         // Create and configure function.
         FunctionType fc;
@@ -142,7 +158,7 @@
         if(!reshape_weights || !transpose_weights)
         {
             TensorShape tmp_shape(weights_shape);
-            RawTensor   tmp(tmp_shape, data_type, 1, fixed_point_position);
+            RawTensor   tmp(tmp_shape, _data_type, 1, _fractional_bits);
 
             // Fill with original shape
             fill(tmp, 1);
@@ -180,12 +196,12 @@
     }
 
     SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, bool transpose_weights,
-                                      bool reshape_weights, DataType data_type, int fixed_point_position = 0)
+                                      bool reshape_weights)
     {
         // Create reference
-        SimpleTensor<T> src{ input_shape, data_type, 1, fixed_point_position };
-        SimpleTensor<T> weights{ weights_shape, data_type, 1, fixed_point_position };
-        SimpleTensor<T> bias{ bias_shape, data_type, 1, fixed_point_position };
+        SimpleTensor<T>     src{ input_shape, _data_type, 1, _fractional_bits, _quantization_info };
+        SimpleTensor<T>     weights{ weights_shape, _data_type, 1, _fractional_bits, _quantization_info };
+        SimpleTensor<TBias> bias{ bias_shape, _bias_data_type, 1, _fractional_bits, _quantization_info };
 
         // Fill reference
         fill(src, 0);
@@ -195,22 +211,51 @@
         return reference::fully_connected_layer<T>(src, weights, bias, output_shape);
     }
 
-    TensorType      _target{};
-    SimpleTensor<T> _reference{};
-    int             _fractional_bits{};
-    DataType        _data_type{};
+    TensorType       _target{};
+    SimpleTensor<T>  _reference{};
+    DataType         _data_type{};
+    DataType         _bias_data_type{};
+    int              _fractional_bits{};
+    QuantizationInfo _quantization_info{};
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool run_interleave>
-class FullyConnectedLayerValidationFixture : public FullyConnectedLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T, run_interleave>
+class FullyConnectedLayerValidationFixture : public FullyConnectedLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, run_interleave>
 {
 public:
     template <typename...>
     void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type)
     {
-        FullyConnectedLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T, run_interleave>::setup(input_shape, weights_shape, bias_shape, output_shape, transpose_weights,
-                                                                                                                         reshape_weights, data_type,
-                                                                                                                         0);
+        FullyConnectedLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, run_interleave>::setup(input_shape, weights_shape, bias_shape, output_shape, transpose_weights,
+                                                                                                                      reshape_weights, data_type,
+                                                                                                                      0, QuantizationInfo());
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool run_interleave>
+class FullyConnectedLayerValidationFixedPointFixture : public FullyConnectedLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, run_interleave>
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type, int fractional_bits)
+    {
+        FullyConnectedLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, run_interleave>::setup(input_shape, weights_shape, bias_shape, output_shape, transpose_weights,
+                                                                                                                      reshape_weights, data_type,
+                                                                                                                      fractional_bits, QuantizationInfo());
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool run_interleave>
+class FullyConnectedLayerValidationQuantizedFixture : public FullyConnectedLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, run_interleave>
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type,
+               QuantizationInfo quantization_info)
+    {
+        FullyConnectedLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T, run_interleave>::setup(input_shape, weights_shape, bias_shape, output_shape, transpose_weights,
+                                                                                                                      reshape_weights, data_type,
+                                                                                                                      0, quantization_info);
     }
 };
 } // namespace validation
diff --git a/tests/validation/fixtures/GEMMFixture.h b/tests/validation/fixtures/GEMMFixture.h
index 923a29c..e807ad8 100644
--- a/tests/validation/fixtures/GEMMFixture.h
+++ b/tests/validation/fixtures/GEMMFixture.h
@@ -31,8 +31,8 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/GEMM.h"
 #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/GEMM.h"
 
 #include <random>
 
diff --git a/tests/validation/fixtures/GEMMInterleave4x4Fixture.h b/tests/validation/fixtures/GEMMInterleave4x4Fixture.h
new file mode 100644
index 0000000..1f0a742
--- /dev/null
+++ b/tests/validation/fixtures/GEMMInterleave4x4Fixture.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_GEMM_INTERLEAVE_4X4_FIXTURE
+#define ARM_COMPUTE_TEST_GEMM_INTERLEAVE_4X4_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/GEMMInterleave4x4.h"
+
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class GEMMInterleave4x4ValidationFixedPointFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(size_t x, size_t y, DataType data_type, int fractional_bits)
+    {
+        _fractional_bits = fractional_bits;
+        _data_type       = data_type;
+        const TensorShape shape_a(x, y);
+        const TensorShape shape_b(static_cast<size_t>(x * 4.f), static_cast<size_t>(std::ceil(y / 4.f)));
+        _target    = compute_target(shape_a, shape_b, data_type, fractional_bits);
+        _reference = compute_reference(shape_a, shape_b, data_type, fractional_bits);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        switch(tensor.data_type())
+        {
+            case DataType::F16:
+            case DataType::F32:
+            {
+                std::uniform_real_distribution<> distribution(-1.f, 1.f);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            default:
+                library->fill_tensor_uniform(tensor, i);
+                break;
+        }
+    }
+
+    TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, DataType data_type, int fixed_point_position)
+    {
+        // Create tensors
+        TensorType a = create_tensor<TensorType>(shape_a, data_type, 1, fixed_point_position);
+        TensorType b = create_tensor<TensorType>(shape_b, data_type, 1, fixed_point_position);
+
+        // Create and configure function
+        FunctionType f;
+        f.configure(&a, &b);
+
+        ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        a.allocator()->allocate();
+        b.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(a), 0);
+        fill(AccessorType(b), 0);
+
+        // Compute GEMM function
+        f.run();
+        return b;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, DataType data_type, int fixed_point_position)
+    {
+        // Create reference
+        SimpleTensor<T> a{ shape_a, data_type, 1, fixed_point_position };
+        SimpleTensor<T> b{ shape_b, data_type, 1, fixed_point_position };
+
+        // Fill reference
+        fill(a, 0);
+        fill(b, 0);
+
+        return reference::gemm_interleave_4x4<T>(a, b);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+    int             _fractional_bits{};
+    DataType        _data_type{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class GEMMInterleave4x4ValidationFixture : public GEMMInterleave4x4ValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(size_t x, size_t y, DataType data_type)
+    {
+        GEMMInterleave4x4ValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(x, y, data_type, 0);
+    }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_GEMM_INTERLEAVE_4X4_FIXTURE */
diff --git a/tests/validation/fixtures/GEMMInterleaveBlockedFixture.h b/tests/validation/fixtures/GEMMInterleaveBlockedFixture.h
new file mode 100644
index 0000000..488324d
--- /dev/null
+++ b/tests/validation/fixtures/GEMMInterleaveBlockedFixture.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_GEMM_INTERLEAVE_BLOCKED_FIXTURE
+#define ARM_COMPUTE_TEST_GEMM_INTERLEAVE_BLOCKED_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/GEMMInterleaveBlocked.h"
+
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, bool Transposed = false>
+class GEMMInterleaveBlockedValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(size_t x, size_t y, int int_by, int block)
+    {
+        const float       interleave_by_f32 = int_by;
+        const TensorShape shape_a(x, y);
+        const TensorShape shape_b(static_cast<size_t>(x * interleave_by_f32), static_cast<size_t>(std::ceil(y / interleave_by_f32)));
+        _target    = compute_target(shape_a, shape_b, int_by, block);
+        _reference = compute_reference(shape_a, shape_b, int_by, block);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        ARM_COMPUTE_ERROR_ON(tensor.data_type() != DataType::U8);
+        std::uniform_int_distribution<> distribution(0, 255);
+        library->fill(tensor, distribution, i);
+    }
+
+    TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, int int_by, int block)
+    {
+        // Create tensors
+        TensorType a = create_tensor<TensorType>(shape_a, DataType::U8, 1);
+        TensorType b = create_tensor<TensorType>(shape_b, DataType::U8, 1);
+
+        // Create and configure function
+        FunctionType f;
+        f.configure(&a, &b, int_by, block, Transposed);
+
+        ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        a.allocator()->allocate();
+        b.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(a), 0);
+
+        // Compute GEMM function
+        f.run();
+        return b;
+    }
+
+    SimpleTensor<uint8_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, int int_by, int block)
+    {
+        // Create reference
+        SimpleTensor<uint8_t> a{ shape_a, DataType::U8, 1 };
+        SimpleTensor<uint8_t> b{ shape_b, DataType::U8, 1 };
+
+        // Fill reference
+        fill(a, 0);
+        return reference::gemm_interleave_blocked<uint8_t>(a, b, int_by, block, Transposed);
+    }
+
+    TensorType            _target{};
+    SimpleTensor<uint8_t> _reference{};
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_GEMM_INTERLEAVE_BLOCKED_FIXTURE */
diff --git a/tests/validation/fixtures/GEMMLowpAssemblyFixture.h b/tests/validation/fixtures/GEMMLowpAssemblyFixture.h
new file mode 100644
index 0000000..ff33c9d
--- /dev/null
+++ b/tests/validation/fixtures/GEMMLowpAssemblyFixture.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_GEMMLOWP_ASSEMBLY_FIXTURE
+#define ARM_COMPUTE_TEST_GEMMLOWP_ASSEMBLY_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/GEMMLowp.h"
+
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T2>
+class GEMMLowpAssemblyFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(size_t m, size_t n, size_t k)
+    {
+        const TensorShape shape_a(k, m);
+        const TensorShape shape_b(n, k);
+        const TensorShape shape_c(n, m);
+        _target    = compute_target(shape_a, shape_b, shape_c);
+        _reference = compute_reference(shape_a, shape_b, shape_c);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i, int lo, int hi)
+    {
+        std::uniform_int_distribution<> distribution(lo, hi);
+        library->fill(tensor, distribution, i);
+    }
+
+    TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c)
+    {
+        DataType dt_in = std::is_same<T2, int8_t>::value ? DataType::S8 : DataType::U8;
+
+        // Create tensors
+        TensorType a = create_tensor<TensorType>(shape_a, dt_in, 1);
+        TensorType b = create_tensor<TensorType>(shape_b, dt_in, 1);
+        TensorType c = create_tensor<TensorType>(shape_c, DataType::S32, 1);
+
+        // Create and configure function
+        FunctionType gemmlowp;
+        gemmlowp.configure(&a, &b, &c);
+
+        ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        a.allocator()->allocate();
+        b.allocator()->allocate();
+        c.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!c.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        if(dt_in == DataType::S8)
+        {
+            fill(AccessorType(a), 0, -128, 127);
+            fill(AccessorType(b), 1, -128, 127);
+        }
+        else
+        {
+            fill(AccessorType(a), 0, 0, 128);
+            fill(AccessorType(b), 1, 0, 128);
+        }
+        fill(AccessorType(c), 2, 0, 0);
+
+        // Compute GEMM function
+        gemmlowp.run();
+        return c;
+    }
+
+    SimpleTensor<int32_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c)
+    {
+        DataType dt = std::is_same<T2, int8_t>::value ? DataType::S8 : DataType::U8;
+
+        // Create reference
+        SimpleTensor<T2> a{ shape_a, dt, 1 };
+        SimpleTensor<T2> b{ shape_b, dt, 1 };
+
+        // Fill reference
+        if(dt == DataType::S8)
+        {
+            fill(a, 0, -128, 127);
+            fill(b, 1, -128, 127);
+        }
+        else
+        {
+            fill(a, 0, 0, 128);
+            fill(b, 1, 0, 128);
+        }
+
+        return reference::gemmlowp<int32_t, T2>(a, b);
+    }
+
+    TensorType            _target{};
+    SimpleTensor<int32_t> _reference{};
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE */
diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h
index c972469..06d6be3 100644
--- a/tests/validation/fixtures/GEMMLowpFixture.h
+++ b/tests/validation/fixtures/GEMMLowpFixture.h
@@ -31,8 +31,8 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/GEMMLowp.h"
 #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/GEMMLowp.h"
 
 #include <random>
 
@@ -43,39 +43,39 @@
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType>
-class GEMMLowpOffsetValidationFixture : public framework::Fixture
+class GEMMLowpMatrixMultiplyCoreValidationFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(size_t m, size_t n, size_t k, int32_t a_offset, int32_t b_offset, int32_t c_offset, int32_t c_mult_int, int32_t out_shift)
+    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, int32_t a_offset, int32_t b_offset)
     {
-        const TensorShape shape_a(k, m);
-        const TensorShape shape_b(n, k);
-        const TensorShape shape_c(n, m);
-        _target    = compute_target(shape_a, shape_b, shape_c, a_offset, b_offset, c_offset, c_mult_int, out_shift);
-        _reference = compute_reference(shape_a, shape_b, shape_c, a_offset, b_offset, c_offset, c_mult_int, out_shift);
+        _target    = compute_target(shape_a, shape_b, shape_c, a_offset, b_offset);
+        _reference = compute_reference(shape_a, shape_b, shape_c, a_offset, b_offset);
     }
 
 protected:
     template <typename U>
     void fill(U &&tensor, int i)
     {
-        ARM_COMPUTE_ERROR_ON(tensor.data_type() != DataType::U8);
-        std::uniform_int_distribution<> distribution(0, 3);
+        // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
+        std::uniform_int_distribution<> distribution(1, 254);
         library->fill(tensor, distribution, i);
     }
 
     TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c,
-                              int32_t a_offset, int32_t b_offset, int32_t c_offset, int32_t c_mult_int, int32_t out_shift)
+                              int32_t a_offset, int32_t b_offset)
     {
         // Create tensors
-        TensorType a = create_tensor<TensorType>(shape_a, DataType::U8, 1);
-        TensorType b = create_tensor<TensorType>(shape_b, DataType::U8, 1);
-        TensorType c = create_tensor<TensorType>(shape_c, DataType::U8, 1);
+        TensorType a = create_tensor<TensorType>(shape_a, DataType::QASYMM8, 1);
+        TensorType b = create_tensor<TensorType>(shape_b, DataType::QASYMM8, 1);
+        TensorType c = create_tensor<TensorType>(shape_c, DataType::S32, 1);
+
+        a.info()->set_quantization_info(QuantizationInfo(1.0f / 255, a_offset));
+        b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset));
 
         // Create and configure function
         FunctionType gemmlowp;
-        gemmlowp.configure(&a, &b, &c, a_offset, b_offset, c_offset, c_mult_int, out_shift);
+        gemmlowp.configure(&a, &b, &c);
 
         ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -93,34 +93,213 @@
         // Fill tensors
         fill(AccessorType(a), 0);
         fill(AccessorType(b), 1);
-        fill(AccessorType(c), 2);
 
         // Compute GEMM function
         gemmlowp.run();
         return c;
     }
 
-    SimpleTensor<uint8_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c,
-                                            int32_t a_offset, int32_t b_offset, int32_t c_offset, int32_t c_mult_int, int32_t out_shift)
+    SimpleTensor<int32_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c,
+                                            int32_t a_offset, int32_t b_offset)
     {
         // Create reference
-        SimpleTensor<uint8_t> a{ shape_a, DataType::U8, 1 };
-        SimpleTensor<uint8_t> b{ shape_b, DataType::U8, 1 };
-        SimpleTensor<uint8_t> c{ shape_c, DataType::U8, 1 };
+        SimpleTensor<uint8_t> a{ shape_a, DataType::QASYMM8, 1 };
+        SimpleTensor<uint8_t> b{ shape_b, DataType::QASYMM8, 1 };
 
         // Fill reference
         fill(a, 0);
         fill(b, 1);
-        fill(c, 2);
 
-        return reference::gemmlowp<uint8_t>(a, b, c, a_offset, b_offset, c_offset, c_mult_int, out_shift);
+        return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(a, b, a_offset, b_offset);
+    }
+
+    TensorType            _target{};
+    SimpleTensor<int32_t> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType>
+class GEMMLowpQuantizeDownInt32ToUint8ScaleValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
+    {
+        _target    = compute_target(shape, result_offset, result_mult_int, result_shift, min, max, add_bias);
+        _reference = compute_reference(shape, result_offset, result_mult_int, result_shift, min, max, add_bias);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        std::uniform_int_distribution<> distribution(-6000, 6000);
+        library->fill(tensor, distribution, i);
+    }
+
+    TensorType compute_target(const TensorShape &shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
+    {
+        TensorShape shape_bias(shape[0]);
+
+        // Create tensors
+        TensorType a = create_tensor<TensorType>(shape, DataType::S32, 1);
+        TensorType b = create_tensor<TensorType>(shape_bias, DataType::S32, 1);
+        TensorType c = create_tensor<TensorType>(shape, DataType::QASYMM8, 1);
+
+        // Create and configure function
+        FunctionType output_stage;
+        output_stage.configure(&a, add_bias ? &b : nullptr, &c, result_offset, result_mult_int, result_shift, min, max);
+
+        ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        a.allocator()->allocate();
+        c.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!c.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensor
+        fill(AccessorType(a), 0);
+
+        if(add_bias)
+        {
+            ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+            // Allocate bias tensor
+            b.allocator()->allocate();
+
+            ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+            // Fill tensor
+            fill(AccessorType(b), 1);
+        }
+
+        // Compute GEMM function
+        output_stage.run();
+        return c;
+    }
+
+    SimpleTensor<uint8_t> compute_reference(const TensorShape &shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
+    {
+        // Create reference
+        TensorShape shape_bias(shape[0]);
+
+        SimpleTensor<int32_t> a{ shape, DataType::S32, 1 };
+        SimpleTensor<int32_t> b{ shape_bias, DataType::S32, 1 };
+
+        // Fill reference
+        fill(a, 0);
+
+        if(add_bias)
+        {
+            // Fill bias
+            fill(b, 1);
+
+            return reference::gemmlowp_quantize_down_int32_to_uint8_scale<int32_t>(a, b, result_offset, result_mult_int, result_shift, min, max);
+        }
+        else
+        {
+            return reference::gemmlowp_quantize_down_int32_to_uint8_scale<int32_t>(a, result_offset, result_mult_int, result_shift, min, max);
+        }
     }
 
     TensorType            _target{};
     SimpleTensor<uint8_t> _reference{};
 };
 
+template <typename TensorType, typename AccessorType, typename FunctionType>
+class GEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max, bool add_bias)
+    {
+        _target    = compute_target(shape, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max, add_bias);
+        _reference = compute_reference(shape, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max, add_bias);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        std::uniform_int_distribution<> distribution(-6000, 6000);
+        library->fill(tensor, distribution, i);
+    }
+
+    TensorType compute_target(const TensorShape &shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max, bool add_bias)
+    {
+        TensorShape shape_bias(shape[0]);
+
+        // Create tensors
+        TensorType a = create_tensor<TensorType>(shape, DataType::S32, 1);
+        TensorType b = create_tensor<TensorType>(shape_bias, DataType::S32, 1);
+        TensorType c = create_tensor<TensorType>(shape, DataType::QASYMM8, 1);
+
+        // Create and configure function
+        FunctionType output_stage;
+        output_stage.configure(&a, add_bias ? &b : nullptr, &c, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
+
+        ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        a.allocator()->allocate();
+        c.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!c.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensor
+        fill(AccessorType(a), 0);
+
+        if(add_bias)
+        {
+            ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+            // Allocate bias tensor
+            b.allocator()->allocate();
+
+            ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+            // Fill tensor
+            fill(AccessorType(b), 1);
+        }
+
+        // Compute GEMM function
+        output_stage.run();
+        return c;
+    }
+
+    SimpleTensor<uint8_t> compute_reference(const TensorShape &shape, int32_t result_fixed_point_multiplier, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max,
+                                            bool add_bias)
+    {
+        // Create reference
+        TensorShape shape_bias(shape[0]);
+
+        SimpleTensor<int32_t> a{ shape, DataType::S32, 1 };
+        SimpleTensor<int32_t> b{ shape_bias, DataType::S32, 1 };
+
+        // Fill reference
+        fill(a, 0);
+
+        if(add_bias)
+        {
+            // Fill bias
+            fill(b, 1);
+
+            return reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(a, b, result_fixed_point_multiplier, result_shift, result_offset_after_shift, min, max);
+        }
+        else
+        {
+            return reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint<int32_t>(a, result_fixed_point_multiplier, result_shift, result_offset_after_shift, min, max);
+        }
+    }
+
+    TensorType            _target{};
+    SimpleTensor<uint8_t> _reference{};
+};
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE */
+#endif /* ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE */
\ No newline at end of file
diff --git a/tests/validation/fixtures/Gaussian3x3Fixture.h b/tests/validation/fixtures/Gaussian3x3Fixture.h
index 3b15921..396e63e 100644
--- a/tests/validation/fixtures/Gaussian3x3Fixture.h
+++ b/tests/validation/fixtures/Gaussian3x3Fixture.h
@@ -31,7 +31,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/Gaussian3x3.h"
+#include "tests/validation/reference/Gaussian3x3.h"
 
 #include <random>
 
@@ -52,8 +52,9 @@
         std::uniform_int_distribution<uint8_t> distribution(0, 255);
         const uint8_t                          constant_border_value = distribution(gen);
 
-        _target    = compute_target(shape, data_type, border_mode, constant_border_value);
-        _reference = compute_reference(shape, data_type, border_mode, constant_border_value);
+        _border_mode = border_mode;
+        _target      = compute_target(shape, data_type, border_mode, constant_border_value);
+        _reference   = compute_reference(shape, data_type, border_mode, constant_border_value);
     }
 
 protected:
diff --git a/tests/validation/fixtures/Gaussian5x5Fixture.h b/tests/validation/fixtures/Gaussian5x5Fixture.h
index b1f7f28..31d7acf 100644
--- a/tests/validation/fixtures/Gaussian5x5Fixture.h
+++ b/tests/validation/fixtures/Gaussian5x5Fixture.h
@@ -31,7 +31,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/Gaussian5x5.h"
+#include "tests/validation/reference/Gaussian5x5.h"
 
 #include <random>
 
@@ -52,8 +52,9 @@
         std::uniform_int_distribution<uint8_t> distribution(0, 255);
         const uint8_t                          constant_border_value = distribution(gen);
 
-        _target    = compute_target(shape, data_type, border_mode, constant_border_value);
-        _reference = compute_reference(shape, data_type, border_mode, constant_border_value);
+        _border_mode = border_mode;
+        _target      = compute_target(shape, data_type, border_mode, constant_border_value);
+        _reference   = compute_reference(shape, data_type, border_mode, constant_border_value);
     }
 
 protected:
diff --git a/tests/validation/fixtures/GaussianPyramidHalfFixture.h b/tests/validation/fixtures/GaussianPyramidHalfFixture.h
new file mode 100644
index 0000000..ef7657a
--- /dev/null
+++ b/tests/validation/fixtures/GaussianPyramidHalfFixture.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_GAUSSIAN_PYRAMID_HALF_FIXTURE
+#define ARM_COMPUTE_TEST_GAUSSIAN_PYRAMID_HALF_FIXTURE
+
+#include "arm_compute/core/IPyramid.h"
+#include "arm_compute/core/PyramidInfo.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/GaussianPyramidHalf.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename PyramidType>
+class GaussianPyramidHalfValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, BorderMode border_mode, size_t num_levels)
+    {
+        std::mt19937                           gen(library->seed());
+        std::uniform_int_distribution<uint8_t> distribution(0, 255);
+        const uint8_t                          constant_border_value = distribution(gen);
+
+        _border_mode = border_mode;
+
+        // Compute target and reference
+        compute_target(shape, border_mode, constant_border_value, num_levels);
+        compute_reference(shape, border_mode, constant_border_value, num_levels);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor)
+    {
+        library->fill_tensor_uniform(tensor, 0);
+    }
+
+    void compute_target(const TensorShape &shape, BorderMode border_mode, uint8_t constant_border_value, size_t num_levels)
+    {
+        // Create tensors
+        TensorType src = create_tensor<TensorType>(shape, DataType::U8);
+
+        PyramidInfo pyramid_info(num_levels, SCALE_PYRAMID_HALF, shape, Format::U8);
+        _target.init(pyramid_info);
+
+        // Create and configure function
+        FunctionType gaussian_pyramid;
+
+        gaussian_pyramid.configure(&src, &_target, border_mode, constant_border_value);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        for(size_t i = 0; i < pyramid_info.num_levels(); ++i)
+        {
+            ARM_COMPUTE_EXPECT(_target.get_pyramid_level(i)->info()->is_resizable(), framework::LogLevel::ERRORS);
+        }
+
+        // Allocate input tensor
+        src.allocator()->allocate();
+
+        // Allocate pyramid
+        _target.allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        for(size_t i = 0; i < pyramid_info.num_levels(); ++i)
+        {
+            ARM_COMPUTE_EXPECT(!_target.get_pyramid_level(i)->info()->is_resizable(), framework::LogLevel::ERRORS);
+        }
+
+        // Fill tensors
+        fill(AccessorType(src));
+
+        // Compute function
+        gaussian_pyramid.run();
+    }
+
+    void compute_reference(const TensorShape &shape, BorderMode border_mode, uint8_t constant_border_value, size_t num_levels)
+    {
+        // Create reference
+        SimpleTensor<T> src{ shape, DataType::U8 };
+
+        // Fill reference
+        fill(src);
+
+        _reference = reference::gaussian_pyramid_half<T>(src, border_mode, constant_border_value, num_levels);
+    }
+
+    PyramidType                  _target{};
+    std::vector<SimpleTensor<T>> _reference{};
+    BorderMode                   _border_mode{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_GAUSSIAN_PYRAMID_HALF_FIXTURE */
\ No newline at end of file
diff --git a/tests/validation/fixtures/HarrisCornersFixture.h b/tests/validation/fixtures/HarrisCornersFixture.h
index 6401905..d78845b 100644
--- a/tests/validation/fixtures/HarrisCornersFixture.h
+++ b/tests/validation/fixtures/HarrisCornersFixture.h
@@ -30,8 +30,8 @@
 #include "tests/Globals.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/HarrisCornerDetector.h"
 #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/HarrisCornerDetector.h"
 
 namespace arm_compute
 {
@@ -51,7 +51,7 @@
     {
         HarrisCornersParameters params = harris_corners_parameters();
 
-        _target    = compute_target(shape, gradient_size, block_size, border_mode, use_fp16, format, params);
+        _target = compute_target(shape, gradient_size, block_size, border_mode, use_fp16, format, params);
         _reference = compute_reference(shape, gradient_size, block_size, border_mode, format, params);
     }
 
diff --git a/tests/validation/fixtures/HistogramFixture.h b/tests/validation/fixtures/HistogramFixture.h
new file mode 100644
index 0000000..7349bdf
--- /dev/null
+++ b/tests/validation/fixtures/HistogramFixture.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_HISTOGRAM_FIXTURE
+#define ARM_COMPUTE_TEST_HISTOGRAM_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/Histogram.h"
+#include "utils/Utils.h"
+
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename DistributionType>
+class HistogramValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type)
+    {
+        std::mt19937                            gen(library->seed());
+        std::uniform_int_distribution<size_t>   distribution_size_t(1, 30);
+        const size_t                            num_bins = distribution_size_t(gen);
+        std::uniform_int_distribution<int32_t>  distribution_int32_t(0, 125);
+        const size_t                            offset = distribution_int32_t(gen);
+        std::uniform_int_distribution<uint32_t> distribution_uint32_t(1, 255 - offset);
+        const size_t                            range = distribution_uint32_t(gen);
+
+        _target    = compute_target(shape, data_type, num_bins, offset, range);
+        _reference = compute_reference(shape, data_type, num_bins, offset, range);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor)
+    {
+        library->fill_tensor_uniform(tensor, 0);
+    }
+
+    TensorType compute_target(const TensorShape &shape, DataType data_type, size_t num_bins, int32_t offset, uint32_t range)
+    {
+        // Create tensors
+        TensorType       src = create_tensor<TensorType>(shape, data_type);
+        TensorType       dst = create_tensor<TensorType>(TensorShape(num_bins), DataType::U32);
+        DistributionType distribution_dst(num_bins, offset, range);
+
+        // Create and configure function
+        FunctionType histogram;
+        histogram.configure(&src, &distribution_dst);
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src));
+
+        // Compute function
+        histogram.run();
+
+        // Copy the distribution in a tensor
+        arm_compute::utils::map(distribution_dst, true);
+        AccessorType accessor_dst = AccessorType(dst);
+        uint32_t    *dst_data     = static_cast<uint32_t *>(accessor_dst.data());
+
+        ARM_COMPUTE_EXPECT(accessor_dst.size() <= dst.info()->total_size(), framework::LogLevel::ERRORS);
+
+        std::copy_n(distribution_dst.buffer(), num_bins, dst_data);
+        arm_compute::utils::unmap(distribution_dst);
+        return dst;
+    }
+
+    SimpleTensor<uint32_t> compute_reference(const TensorShape &shape, DataType data_type, size_t num_bins, int32_t offset, uint32_t range)
+    {
+        ARM_COMPUTE_ERROR_ON(data_type != DataType::U8);
+
+        // Create reference
+        SimpleTensor<T> src{ shape, data_type };
+
+        // Fill reference
+        fill(src);
+
+        // Compute reference
+        return reference::histogram<T>(src, num_bins, offset, range);
+    }
+
+    TensorType             _target{};
+    SimpleTensor<uint32_t> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_HISTOGRAM_FIXTURE */
diff --git a/tests/validation/fixtures/IntegralImageFixture.h b/tests/validation/fixtures/IntegralImageFixture.h
index 2f0ec51..8d2149e 100644
--- a/tests/validation/fixtures/IntegralImageFixture.h
+++ b/tests/validation/fixtures/IntegralImageFixture.h
@@ -31,7 +31,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/IntegralImage.h"
+#include "tests/validation/reference/IntegralImage.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/fixtures/L2NormalizeFixture.h b/tests/validation/fixtures/L2NormalizeLayerFixture.h
similarity index 96%
rename from tests/validation/fixtures/L2NormalizeFixture.h
rename to tests/validation/fixtures/L2NormalizeLayerFixture.h
index e611393..6f11dcb 100644
--- a/tests/validation/fixtures/L2NormalizeFixture.h
+++ b/tests/validation/fixtures/L2NormalizeLayerFixture.h
@@ -32,7 +32,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/L2Normalize.h"
+#include "tests/validation/reference/L2NormalizeLayer.h"
 
 namespace arm_compute
 {
@@ -41,7 +41,7 @@
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class L2NormalizeValidationFixture : public framework::Fixture
+class L2NormalizeLayerValidationFixture : public framework::Fixture
 {
 public:
     template <typename...>
diff --git a/tests/validation/fixtures/MagnitudeFixture.h b/tests/validation/fixtures/MagnitudeFixture.h
new file mode 100644
index 0000000..1c52907
--- /dev/null
+++ b/tests/validation/fixtures/MagnitudeFixture.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_MAGNITUDE_FIXTURE
+#define ARM_COMPUTE_TEST_MAGNITUDE_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/Magnitude.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class MagnitudeValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, Format format, MagnitudeType magnitude_type, bool use_fp16)
+    {
+        _target         = compute_target(shape, format, magnitude_type, use_fp16);
+        _reference      = compute_reference(shape, format, magnitude_type);
+        _magnitude_type = magnitude_type;
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, std::random_device::result_type seed_offset)
+    {
+        library->fill_tensor_uniform(tensor, seed_offset);
+    }
+
+    TensorType compute_target(const TensorShape &shape, Format format, MagnitudeType magnitude_type, bool use_fp16)
+    {
+        DataType data_type = data_type_from_format(format);
+
+        // Create tensors
+        TensorType src1 = create_tensor<TensorType>(shape, data_type);
+        src1.info()->set_format(format);
+
+        TensorType src2 = create_tensor<TensorType>(shape, data_type);
+        src2.info()->set_format(format);
+
+        TensorType dst = create_tensor<TensorType>(shape, data_type);
+        dst.info()->set_format(format);
+
+        // Create and configure function
+        FunctionType magnitude;
+        magnitude.configure(&src1, &src2, &dst, magnitude_type, use_fp16);
+
+        ARM_COMPUTE_EXPECT(src1.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(src2.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src1.allocator()->allocate();
+        src2.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src1.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!src2.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src1), 0);
+        fill(AccessorType(src2), 1);
+
+        // Compute function
+        magnitude.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape, Format format, MagnitudeType magnitude_type)
+    {
+        DataType data_type = data_type_from_format(format);
+
+        // Create reference
+        SimpleTensor<T> src1{ shape, data_type };
+        SimpleTensor<T> src2{ shape, data_type };
+
+        // Fill reference
+        fill(src1, 0);
+        fill(src2, 1);
+
+        return reference::magnitude<T>(src1, src2, magnitude_type);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+    MagnitudeType   _magnitude_type{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_MAGNITUDE_FIXTURE */
diff --git a/tests/validation/fixtures/MeanStdDevFixture.h b/tests/validation/fixtures/MeanStdDevFixture.h
index 37f538b..17dfe78 100644
--- a/tests/validation/fixtures/MeanStdDevFixture.h
+++ b/tests/validation/fixtures/MeanStdDevFixture.h
@@ -27,7 +27,7 @@
 #include "tests/Globals.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/MeanStdDev.h"
+#include "tests/validation/reference/MeanStdDev.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/fixtures/L2NormalizeFixture.h b/tests/validation/fixtures/Median3x3Fixture.h
similarity index 69%
copy from tests/validation/fixtures/L2NormalizeFixture.h
copy to tests/validation/fixtures/Median3x3Fixture.h
index e611393..0946358 100644
--- a/tests/validation/fixtures/L2NormalizeFixture.h
+++ b/tests/validation/fixtures/Median3x3Fixture.h
@@ -21,18 +21,19 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE
-#define ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE
+#ifndef ARM_COMPUTE_TEST_MEDIAN3X3_FIXTURE
+#define ARM_COMPUTE_TEST_MEDIAN3X3_FIXTURE
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/Tensor.h"
 #include "tests/AssetsLibrary.h"
 #include "tests/Globals.h"
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/L2Normalize.h"
+#include "tests/validation/reference/Median3x3.h"
+
+#include <random>
 
 namespace arm_compute
 {
@@ -41,14 +42,19 @@
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class L2NormalizeValidationFixture : public framework::Fixture
+class Median3x3ValidationFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type, unsigned int axis, float epsilon)
+    void setup(TensorShape shape, DataType data_type, BorderMode border_mode)
     {
-        _target    = compute_target(shape, data_type, axis, epsilon);
-        _reference = compute_reference(shape, data_type, axis, epsilon);
+        std::mt19937                           gen(library->seed());
+        std::uniform_int_distribution<uint8_t> distribution(0, 255);
+        const uint8_t                          constant_border_value = distribution(gen);
+
+        _border_mode = border_mode;
+        _target      = compute_target(shape, data_type, border_mode, constant_border_value);
+        _reference   = compute_reference(shape, data_type, border_mode, constant_border_value);
     }
 
 protected:
@@ -58,15 +64,15 @@
         library->fill_tensor_uniform(tensor, 0);
     }
 
-    TensorType compute_target(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon)
+    TensorType compute_target(const TensorShape &shape, DataType data_type, BorderMode border_mode, uint8_t constant_border_value)
     {
         // Create tensors
         TensorType src = create_tensor<TensorType>(shape, data_type);
         TensorType dst = create_tensor<TensorType>(shape, data_type);
 
         // Create and configure function
-        FunctionType l2_norm_func;
-        l2_norm_func.configure(&src, &dst, axis, epsilon);
+        FunctionType median3x3;
+        median3x3.configure(&src, &dst, border_mode, constant_border_value);
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -82,26 +88,30 @@
         fill(AccessorType(src));
 
         // Compute function
-        l2_norm_func.run();
+        median3x3.run();
 
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon)
+    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, BorderMode border_mode, uint8_t constant_border_value)
     {
+        ARM_COMPUTE_ERROR_ON(data_type != DataType::U8);
+
         // Create reference
         SimpleTensor<T> src{ shape, data_type };
 
         // Fill reference
         fill(src);
 
-        return reference::l2_normalize<T>(src, axis, epsilon);
+        // Compute reference
+        return reference::median3x3<T>(src, border_mode, constant_border_value);
     }
 
+    BorderMode      _border_mode{};
     TensorType      _target{};
     SimpleTensor<T> _reference{};
 };
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE */
+#endif /* ARM_COMPUTE_TEST_MEDIAN3X3_FIXTURE */
diff --git a/tests/validation/fixtures/MinMaxLocationFixture.h b/tests/validation/fixtures/MinMaxLocationFixture.h
index bf076ef..120a5c4 100644
--- a/tests/validation/fixtures/MinMaxLocationFixture.h
+++ b/tests/validation/fixtures/MinMaxLocationFixture.h
@@ -31,7 +31,7 @@
 #include "tests/Types.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/MinMaxLocation.h"
+#include "tests/validation/reference/MinMaxLocation.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/fixtures/NonLinearFilterFixture.h b/tests/validation/fixtures/NonLinearFilterFixture.h
index 2f22873..78ba0ea 100644
--- a/tests/validation/fixtures/NonLinearFilterFixture.h
+++ b/tests/validation/fixtures/NonLinearFilterFixture.h
@@ -31,8 +31,8 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/NonLinearFilter.h"
 #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/NonLinearFilter.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/fixtures/NormalizationLayerFixture.h b/tests/validation/fixtures/NormalizationLayerFixture.h
index 696d14f..e7d83c7 100644
--- a/tests/validation/fixtures/NormalizationLayerFixture.h
+++ b/tests/validation/fixtures/NormalizationLayerFixture.h
@@ -32,7 +32,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/NormalizationLayer.h"
+#include "tests/validation/reference/NormalizationLayer.h"
 
 #include <random>
 
@@ -47,10 +47,10 @@
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, NormType norm_type, int norm_size, float beta, DataType data_type, int fractional_bits)
+    void setup(TensorShape shape, NormType norm_type, int norm_size, float beta, bool is_scaled, DataType data_type, int fractional_bits)
     {
         _fractional_bits = fractional_bits;
-        NormalizationLayerInfo info(norm_type, norm_size, 5, beta);
+        NormalizationLayerInfo info(norm_type, norm_size, 5, beta, 1.f, is_scaled);
 
         _target    = compute_target(shape, info, data_type, fractional_bits);
         _reference = compute_reference(shape, info, data_type, fractional_bits);
@@ -122,9 +122,9 @@
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, NormType norm_type, int norm_size, float beta, DataType data_type)
+    void setup(TensorShape shape, NormType norm_type, int norm_size, float beta, bool is_scaled, DataType data_type)
     {
-        NormalizationValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, norm_type, norm_size, beta, data_type, 0);
+        NormalizationValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, norm_type, norm_size, beta, is_scaled, data_type, 0);
     }
 };
 } // namespace validation
diff --git a/tests/validation/fixtures/L2NormalizeFixture.h b/tests/validation/fixtures/PermuteFixture.h
similarity index 76%
copy from tests/validation/fixtures/L2NormalizeFixture.h
copy to tests/validation/fixtures/PermuteFixture.h
index e611393..3aae384 100644
--- a/tests/validation/fixtures/L2NormalizeFixture.h
+++ b/tests/validation/fixtures/PermuteFixture.h
@@ -21,9 +21,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE
-#define ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE
+#ifndef ARM_COMPUTE_TEST_PERMUTE_FIXTURE
+#define ARM_COMPUTE_TEST_PERMUTE_FIXTURE
 
+#include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/Tensor.h"
@@ -32,7 +33,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/L2Normalize.h"
+#include "tests/validation/reference/Permute.h"
 
 namespace arm_compute
 {
@@ -41,14 +42,14 @@
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class L2NormalizeValidationFixture : public framework::Fixture
+class PermuteValidationFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type, unsigned int axis, float epsilon)
+    void setup(TensorShape shape, PermutationVector perm, DataType data_type)
     {
-        _target    = compute_target(shape, data_type, axis, epsilon);
-        _reference = compute_reference(shape, data_type, axis, epsilon);
+        _target    = compute_target(shape, data_type, perm);
+        _reference = compute_reference(shape, data_type, perm);
     }
 
 protected:
@@ -58,15 +59,19 @@
         library->fill_tensor_uniform(tensor, 0);
     }
 
-    TensorType compute_target(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon)
+    TensorType compute_target(const TensorShape &shape, DataType data_type, PermutationVector perm)
     {
+        // Permute shapes
+        TensorShape output_shape = shape;
+        permute(output_shape, perm);
+
         // Create tensors
         TensorType src = create_tensor<TensorType>(shape, data_type);
-        TensorType dst = create_tensor<TensorType>(shape, data_type);
+        TensorType dst = create_tensor<TensorType>(output_shape, data_type);
 
         // Create and configure function
-        FunctionType l2_norm_func;
-        l2_norm_func.configure(&src, &dst, axis, epsilon);
+        FunctionType perm_func;
+        perm_func.configure(&src, &dst, perm);
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -82,12 +87,12 @@
         fill(AccessorType(src));
 
         // Compute function
-        l2_norm_func.run();
+        perm_func.run();
 
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon)
+    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, PermutationVector perm)
     {
         // Create reference
         SimpleTensor<T> src{ shape, data_type };
@@ -95,7 +100,7 @@
         // Fill reference
         fill(src);
 
-        return reference::l2_normalize<T>(src, axis, epsilon);
+        return reference::permute<T>(src, perm);
     }
 
     TensorType      _target{};
@@ -104,4 +109,4 @@
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE */
+#endif /* ARM_COMPUTE_TEST_PERMUTE_FIXTURE */
diff --git a/tests/validation/fixtures/PhaseFixture.h b/tests/validation/fixtures/PhaseFixture.h
new file mode 100644
index 0000000..09badcf
--- /dev/null
+++ b/tests/validation/fixtures/PhaseFixture.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_PHASE_FIXTURE
+#define ARM_COMPUTE_TEST_PHASE_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/Phase.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class PhaseValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, Format format, PhaseType phase_type)
+    {
+        _target    = compute_target(shape, format, phase_type);
+        _reference = compute_reference(shape, format, phase_type);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, std::random_device::result_type seed_offset)
+    {
+        library->fill_tensor_uniform(tensor, seed_offset);
+    }
+
+    TensorType compute_target(const TensorShape &shape, Format format, PhaseType phase_type)
+    {
+        DataType data_type = data_type_from_format(format);
+
+        // Create tensors
+        TensorType src1 = create_tensor<TensorType>(shape, data_type);
+        src1.info()->set_format(format);
+
+        TensorType src2 = create_tensor<TensorType>(shape, data_type);
+        src2.info()->set_format(format);
+
+        TensorType dst = create_tensor<TensorType>(shape, DataType::U8);
+        dst.info()->set_format(Format::U8);
+
+        // Create and configure function
+        FunctionType phase;
+
+        phase.configure(&src1, &src2, &dst, phase_type);
+
+        ARM_COMPUTE_EXPECT(src1.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(src2.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src1.allocator()->allocate();
+        src2.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src1.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!src2.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src1), 0);
+        fill(AccessorType(src2), 1);
+
+        // Compute function
+        phase.run();
+
+        return dst;
+    }
+
+    SimpleTensor<uint8_t> compute_reference(const TensorShape &shape, Format format, PhaseType phase_type)
+    {
+        DataType data_type = data_type_from_format(format);
+
+        // Create reference
+        SimpleTensor<T> src1{ shape, data_type };
+        SimpleTensor<T> src2{ shape, data_type };
+
+        // Fill reference
+        fill(src1, 0);
+        fill(src2, 1);
+
+        return reference::phase<T>(src1, src2, phase_type);
+    }
+
+    TensorType            _target{};
+    SimpleTensor<uint8_t> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_PHASE_FIXTURE */
diff --git a/tests/validation/fixtures/PixelWiseMultiplicationFixture.h b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h
new file mode 100644
index 0000000..7428fb5
--- /dev/null
+++ b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_PIXEL_WISE_MULTIPLICATION_FIXTURE
+#define ARM_COMPUTE_TEST_PIXEL_WISE_MULTIPLICATION_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/PixelWiseMultiplication.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
+class PixelWiseMultiplicationValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape    shape,
+               DataType       dt_in1,
+               DataType       dt_in2,
+               float          scale,
+               ConvertPolicy  convert_policy,
+               RoundingPolicy rounding_policy)
+    {
+        _target    = compute_target(shape, dt_in1, dt_in2, scale, convert_policy, rounding_policy);
+        _reference = compute_reference(shape, dt_in1, dt_in2, scale, convert_policy, rounding_policy);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, unsigned int seed_offset)
+    {
+        library->fill_tensor_uniform(tensor, seed_offset);
+    }
+
+    TensorType compute_target(const TensorShape &shape, DataType dt_in1, DataType dt_in2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
+    {
+        // Create tensors
+        TensorType src1 = create_tensor<TensorType>(shape, dt_in1);
+        TensorType src2 = create_tensor<TensorType>(shape, dt_in2);
+        TensorType dst  = create_tensor<TensorType>(shape, dt_in2);
+
+        // Create and configure function
+        FunctionType multiply;
+        multiply.configure(&src1, &src2, &dst, scale, convert_policy, rounding_policy);
+
+        ARM_COMPUTE_EXPECT(src1.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(src2.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src1.allocator()->allocate();
+        src2.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src1.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!src2.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src1), 0);
+        fill(AccessorType(src2), 1);
+
+        // Compute function
+        multiply.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T2> compute_reference(const TensorShape &shape, DataType dt_in1, DataType dt_in2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
+    {
+        // Create reference
+        SimpleTensor<T1> src1{ shape, dt_in1 };
+        SimpleTensor<T2> src2{ shape, dt_in2 };
+
+        // Fill reference
+        fill(src1, 0);
+        fill(src2, 1);
+
+        return reference::pixel_wise_multiplication<T1, T2>(src1, src2, scale, convert_policy, rounding_policy);
+    }
+
+    TensorType       _target{};
+    SimpleTensor<T2> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_PIXEL_WISE_MULTIPLICATION_FIXTURE */
diff --git a/tests/validation/fixtures/PoolingLayerFixture.h b/tests/validation/fixtures/PoolingLayerFixture.h
index 775c412..890eef2 100644
--- a/tests/validation/fixtures/PoolingLayerFixture.h
+++ b/tests/validation/fixtures/PoolingLayerFixture.h
@@ -32,7 +32,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/PoolingLayer.h"
+#include "tests/validation/reference/PoolingLayer.h"
 
 #include <random>
 
@@ -43,28 +43,33 @@
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class PoolingLayerValidationFixedPointFixture : public framework::Fixture
+class PoolingLayerValidationGenericFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, PoolingType pool_type, int pool_size, PadStrideInfo pad_stride_info, DataType data_type, int fractional_bits)
+    void setup(TensorShape shape, PoolingLayerInfo pool_info, DataType data_type, int fractional_bits, QuantizationInfo quantization_info)
     {
-        _fractional_bits = fractional_bits;
-        PoolingLayerInfo info(pool_type, pool_size, pad_stride_info);
+        _fractional_bits   = fractional_bits;
+        _quantization_info = quantization_info;
+        _pool_info         = pool_info;
 
-        _target    = compute_target(shape, info, data_type, fractional_bits);
-        _reference = compute_reference(shape, info, data_type, fractional_bits);
+        _target    = compute_target(shape, pool_info, data_type, fractional_bits, quantization_info);
+        _reference = compute_reference(shape, pool_info, data_type, fractional_bits, quantization_info);
     }
 
 protected:
     template <typename U>
     void fill(U &&tensor)
     {
-        if(_fractional_bits == 0)
+        if(!is_data_type_quantized(tensor.data_type()))
         {
             std::uniform_real_distribution<> distribution(-1.f, 1.f);
             library->fill(tensor, distribution, 0);
         }
+        else if(is_data_type_quantized_asymmetric(tensor.data_type()))
+        {
+            library->fill_tensor_uniform(tensor, 0);
+        }
         else
         {
             const int                       one_fixed = 1 << _fractional_bits;
@@ -73,10 +78,11 @@
         }
     }
 
-    TensorType compute_target(const TensorShape &shape, PoolingLayerInfo info, DataType data_type, int fixed_point_position = 0)
+    TensorType compute_target(const TensorShape &shape, PoolingLayerInfo info,
+                              DataType data_type, int fixed_point_position, QuantizationInfo quantization_info)
     {
         // Create tensors
-        TensorType src = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position);
+        TensorType src = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position, quantization_info);
         TensorType dst;
 
         // Create and configure function
@@ -102,10 +108,11 @@
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &shape, PoolingLayerInfo info, DataType data_type, int fixed_point_position = 0)
+    SimpleTensor<T> compute_reference(const TensorShape &shape, PoolingLayerInfo info,
+                                      DataType data_type, int fixed_point_position, QuantizationInfo quantization_info)
     {
         // Create reference
-        SimpleTensor<T> src{ shape, data_type, 1, fixed_point_position };
+        SimpleTensor<T> src{ shape, data_type, 1, fixed_point_position, quantization_info };
 
         // Fill reference
         fill(src);
@@ -113,30 +120,57 @@
         return reference::pooling_layer<T>(src, info);
     }
 
-    TensorType      _target{};
-    SimpleTensor<T> _reference{};
-    int             _fractional_bits{};
+    TensorType       _target{};
+    SimpleTensor<T>  _reference{};
+    int              _fractional_bits{};
+    QuantizationInfo _quantization_info{};
+    PoolingLayerInfo _pool_info{};
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class PoolingLayerValidationFixture : public PoolingLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>
+class PoolingLayerValidationFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, PoolingType pool_type, int pool_size, PadStrideInfo pad_stride_info, DataType data_type)
+    void setup(TensorShape shape, PoolingType pool_type, int pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type)
     {
-        PoolingLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, pool_type, pool_size, pad_stride_info, data_type, 0);
+        PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, pad_stride_info, exclude_padding),
+                                                                                               data_type, 0, QuantizationInfo());
     }
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class GlobalPoolingLayerValidationFixture : public PoolingLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>
+class PoolingLayerValidationFixedPointFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, PoolingType pool_type, int pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, int fractional_bits)
+    {
+        PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, pad_stride_info, exclude_padding),
+                                                                                               data_type, fractional_bits, QuantizationInfo());
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class PoolingLayerValidationQuantizedFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, PoolingType pool_type, int pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, QuantizationInfo quantization_info)
+    {
+        PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, pad_stride_info, exclude_padding),
+                                                                                               data_type, 0, quantization_info);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class GlobalPoolingLayerValidationFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
 {
 public:
     template <typename...>
     void setup(TensorShape shape, PoolingType pool_type, DataType data_type)
     {
-        PoolingLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, pool_type, shape.x(), PadStrideInfo(1, 1, 0, 0), data_type, 0);
+        PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type), data_type, 0, QuantizationInfo());
     }
 };
 } // namespace validation
diff --git a/tests/validation/fixtures/QuantizationLayerFixture.h b/tests/validation/fixtures/QuantizationLayerFixture.h
index 83ee049..6ce673a 100644
--- a/tests/validation/fixtures/QuantizationLayerFixture.h
+++ b/tests/validation/fixtures/QuantizationLayerFixture.h
@@ -32,7 +32,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/QuantizationLayer.h"
+#include "tests/validation/reference/QuantizationLayer.h"
 
 #include <random>
 
diff --git a/tests/validation/fixtures/ReductionOperationFixture.h b/tests/validation/fixtures/ReductionOperationFixture.h
index 7c871ae..6fa5f0c 100644
--- a/tests/validation/fixtures/ReductionOperationFixture.h
+++ b/tests/validation/fixtures/ReductionOperationFixture.h
@@ -32,7 +32,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/ReductionOperation.h"
+#include "tests/validation/reference/ReductionOperation.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/fixtures/RemapFixture.h b/tests/validation/fixtures/RemapFixture.h
new file mode 100644
index 0000000..846ebf4
--- /dev/null
+++ b/tests/validation/fixtures/RemapFixture.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_REMAP_FIXTURE
+#define ARM_COMPUTE_TEST_REMAP_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/Remap.h"
+
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class RemapValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode)
+    {
+        std::mt19937                           gen(library->seed());
+        std::uniform_int_distribution<uint8_t> distribution(0, 255);
+        T                                      constant_border_value = static_cast<T>(distribution(gen));
+
+        _target    = compute_target(shape, policy, data_type, border_mode, constant_border_value);
+        _reference = compute_reference(shape, policy, data_type, border_mode, constant_border_value);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        library->fill_tensor_uniform(tensor, i);
+    }
+
+    TensorType compute_target(const TensorShape &shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, T constant_border_value)
+    {
+        // Create tensors
+        TensorType src   = create_tensor<TensorType>(shape, data_type);
+        TensorType map_x = create_tensor<TensorType>(shape, DataType::F32);
+        TensorType map_y = create_tensor<TensorType>(shape, DataType::F32);
+        TensorType dst   = create_tensor<TensorType>(shape, data_type);
+
+        // Create and configure function
+        FunctionType remap;
+        remap.configure(&src, &map_x, &map_y, &dst, policy, border_mode, constant_border_value);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(map_x.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(map_y.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        map_x.allocator()->allocate();
+        map_y.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!map_x.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!map_y.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src), 0);
+        fill(AccessorType(map_x), 1);
+        fill(AccessorType(map_y), 2);
+
+        // Compute function
+        remap.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, T constant_border_value)
+    {
+        ARM_COMPUTE_ERROR_ON(data_type != DataType::U8);
+
+        // Create reference
+        SimpleTensor<T>     src{ shape, data_type };
+        SimpleTensor<float> map_x{ shape, DataType::F32 };
+        SimpleTensor<float> map_y{ shape, DataType::F32 };
+
+        // Create the valid mask Tensor
+        _valid_mask = SimpleTensor<T> { shape, data_type };
+
+        // Fill reference
+        fill(src, 0);
+        fill(map_x, 1);
+        fill(map_y, 2);
+
+        // Compute reference
+        return reference::remap<T>(src, map_x, map_y, _valid_mask, policy, border_mode, constant_border_value);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+    SimpleTensor<T> _valid_mask{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_REMAP_FIXTURE */
diff --git a/tests/validation/fixtures/ReshapeLayerFixture.h b/tests/validation/fixtures/ReshapeLayerFixture.h
index 435717c..2bd5148 100644
--- a/tests/validation/fixtures/ReshapeLayerFixture.h
+++ b/tests/validation/fixtures/ReshapeLayerFixture.h
@@ -31,7 +31,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/ReshapeLayer.h"
+#include "tests/validation/reference/ReshapeLayer.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/fixtures/ScaleFixture.h b/tests/validation/fixtures/ScaleFixture.h
index 476985e..fe24f5b 100644
--- a/tests/validation/fixtures/ScaleFixture.h
+++ b/tests/validation/fixtures/ScaleFixture.h
@@ -31,7 +31,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/Scale.h"
+#include "tests/validation/reference/Scale.h"
 
 namespace arm_compute
 {
@@ -44,15 +44,16 @@
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type, InterpolationPolicy policy, BorderMode border_mode)
+    void setup(TensorShape shape, DataType data_type, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy)
     {
         constexpr float max_width  = 8192.0f;
         constexpr float max_height = 6384.0f;
 
-        _shape       = shape;
-        _policy      = policy;
-        _border_mode = border_mode;
-        _data_type   = data_type;
+        _shape           = shape;
+        _policy          = policy;
+        _border_mode     = border_mode;
+        _sampling_policy = sampling_policy;
+        _data_type       = data_type;
 
         std::mt19937                          generator(library->seed());
         std::uniform_real_distribution<float> distribution_float(0.25, 3);
@@ -65,8 +66,8 @@
         std::uniform_int_distribution<uint8_t> distribution_u8(0, 255);
         T                                      constant_border_value = static_cast<T>(distribution_u8(generator));
 
-        _target    = compute_target(shape, scale_x, scale_y, policy, border_mode, constant_border_value);
-        _reference = compute_reference(shape, scale_x, scale_y, policy, border_mode, constant_border_value);
+        _target    = compute_target(shape, scale_x, scale_y, policy, border_mode, constant_border_value, sampling_policy);
+        _reference = compute_reference(shape, scale_x, scale_y, policy, border_mode, constant_border_value, sampling_policy);
     }
 
 protected:
@@ -77,7 +78,7 @@
     }
 
     TensorType compute_target(const TensorShape &shape, const float scale_x, const float scale_y,
-                              InterpolationPolicy policy, BorderMode border_mode, T constant_border_value)
+                              InterpolationPolicy policy, BorderMode border_mode, T constant_border_value, SamplingPolicy sampling_policy)
     {
         // Create tensors
         TensorType  src = create_tensor<TensorType>(shape, _data_type);
@@ -89,7 +90,7 @@
         // Create and configure function
         FunctionType scale;
 
-        scale.configure(&src, &dst, policy, border_mode, constant_border_value);
+        scale.configure(&src, &dst, policy, border_mode, constant_border_value, sampling_policy);
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -110,7 +111,7 @@
     }
 
     SimpleTensor<T> compute_reference(const TensorShape &shape, const float scale_x, const float scale_y,
-                                      InterpolationPolicy policy, BorderMode border_mode, T constant_border_value)
+                                      InterpolationPolicy policy, BorderMode border_mode, T constant_border_value, SamplingPolicy sampling_policy)
     {
         // Create reference
         SimpleTensor<T> src{ shape, _data_type };
@@ -118,7 +119,7 @@
         // Fill reference
         fill(src);
 
-        return reference::scale<T>(src, scale_x, scale_y, policy, border_mode, constant_border_value);
+        return reference::scale<T>(src, scale_x, scale_y, policy, border_mode, constant_border_value, sampling_policy);
     }
 
     TensorType          _target{};
@@ -126,6 +127,7 @@
     TensorShape         _shape{};
     InterpolationPolicy _policy{};
     BorderMode          _border_mode{};
+    SamplingPolicy      _sampling_policy{};
     DataType            _data_type{};
 };
 } // namespace validation
diff --git a/tests/validation/fixtures/ScharrFixture.h b/tests/validation/fixtures/ScharrFixture.h
new file mode 100644
index 0000000..36b8e98
--- /dev/null
+++ b/tests/validation/fixtures/ScharrFixture.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_SCHARR_FIXTURE
+#define ARM_COMPUTE_TEST_SCHARR_FIXTURE
+
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/Types.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/Scharr.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+class CLScharr3x3;
+class NEScharr3x3;
+
+namespace test
+{
+namespace validation
+{
+namespace
+{
+template <typename Function>
+struct info;
+
+template <>
+struct info<NEScharr3x3>
+{
+    static const Format dst_format  = Format::S16;
+    static const int    filter_size = 3;
+};
+
+template <>
+struct info<CLScharr3x3>
+{
+    static const Format dst_format  = Format::S16;
+    static const int    filter_size = 3;
+};
+} // namespace
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename U>
+class ScharrValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, BorderMode border_mode, Format format, GradientDimension gradient_dimension)
+    {
+        // Generate a random constant value
+        std::mt19937                           gen(library->seed());
+        std::uniform_int_distribution<uint8_t> int_dist(0, 255);
+        const uint8_t                          constant_border_value = int_dist(gen);
+
+        _border_mode = border_mode;
+        _target      = compute_target(shape, border_mode, format, constant_border_value, gradient_dimension);
+        _reference   = compute_reference(shape, info<FunctionType>::filter_size, border_mode, format, constant_border_value, gradient_dimension);
+    }
+
+protected:
+    template <typename V>
+    void fill(V &&tensor)
+    {
+        library->fill_tensor_uniform(tensor, 0);
+    }
+
+    template <typename V>
+    void fill_zero(V &&tensor)
+    {
+        library->fill_tensor_uniform(tensor, 0, static_cast<U>(0), static_cast<U>(0));
+    }
+
+    std::pair<TensorType, TensorType> compute_target(const TensorShape &shape, BorderMode border_mode, Format format, uint8_t constant_border_value, GradientDimension gradient_dimension)
+    {
+        // Create tensors
+        TensorType src   = create_tensor<TensorType>(shape, data_type_from_format(format));
+        TensorType dst_x = create_tensor<TensorType>(shape, data_type_from_format(info<FunctionType>::dst_format));
+        TensorType dst_y = create_tensor<TensorType>(shape, data_type_from_format(info<FunctionType>::dst_format));
+
+        src.info()->set_format(format);
+        dst_x.info()->set_format(info<FunctionType>::dst_format);
+        dst_y.info()->set_format(info<FunctionType>::dst_format);
+
+        FunctionType scharr;
+
+        switch(gradient_dimension)
+        {
+            case GradientDimension::GRAD_X:
+                scharr.configure(&src, &dst_x, nullptr, border_mode, constant_border_value);
+                break;
+            case GradientDimension::GRAD_Y:
+                scharr.configure(&src, nullptr, &dst_y, border_mode, constant_border_value);
+                break;
+            case GradientDimension::GRAD_XY:
+                scharr.configure(&src, &dst_x, &dst_y, border_mode, constant_border_value);
+                break;
+            default:
+                ARM_COMPUTE_ERROR("Gradient dimension not supported");
+        }
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst_x.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst_y.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst_x.allocator()->allocate();
+        dst_y.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst_x.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst_y.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src));
+        fill_zero(AccessorType(dst_x));
+        fill_zero(AccessorType(dst_y));
+
+        // Compute function
+        scharr.run();
+
+        return std::make_pair(std::move(dst_x), std::move(dst_y));
+    }
+
+    std::pair<SimpleTensor<U>, SimpleTensor<U>> compute_reference(const TensorShape &shape, int filter_size, BorderMode border_mode, Format format, uint8_t constant_border_value,
+                                                                  GradientDimension gradient_dimension)
+    {
+        // Create reference
+        SimpleTensor<T> src{ shape, format };
+
+        // Fill reference
+        fill(src);
+
+        return reference::scharr<U>(src, filter_size, border_mode, constant_border_value, gradient_dimension);
+    }
+
+    BorderMode _border_mode{ BorderMode::UNDEFINED };
+    std::pair<TensorType, TensorType>           _target{};
+    std::pair<SimpleTensor<U>, SimpleTensor<U>> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_SCHARR_FIXTURE */
diff --git a/tests/validation/fixtures/SobelFixture.h b/tests/validation/fixtures/SobelFixture.h
index 2a592b8..f075a69 100644
--- a/tests/validation/fixtures/SobelFixture.h
+++ b/tests/validation/fixtures/SobelFixture.h
@@ -32,7 +32,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/Sobel.h"
+#include "tests/validation/reference/Sobel.h"
 
 #include <memory>
 
@@ -102,7 +102,7 @@
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, BorderMode border_mode, Format format)
+    void setup(TensorShape shape, BorderMode border_mode, Format format, GradientDimension gradient_dimension)
     {
         // Generate a random constant value
         std::mt19937                           gen(library->seed());
@@ -110,8 +110,8 @@
         const uint8_t                          constant_border_value = int_dist(gen);
 
         _border_mode = border_mode;
-        _target      = compute_target(shape, border_mode, format, constant_border_value);
-        _reference   = compute_reference(shape, info<FunctionType>::filter_size, border_mode, format, constant_border_value);
+        _target      = compute_target(shape, border_mode, format, constant_border_value, gradient_dimension);
+        _reference   = compute_reference(shape, info<FunctionType>::filter_size, border_mode, format, constant_border_value, gradient_dimension);
     }
 
 protected:
@@ -121,7 +121,7 @@
         library->fill_tensor_uniform(tensor, 0);
     }
 
-    std::pair<TensorType, TensorType> compute_target(const TensorShape &shape, BorderMode border_mode, Format format, uint8_t constant_border_value)
+    std::pair<TensorType, TensorType> compute_target(const TensorShape &shape, BorderMode border_mode, Format format, uint8_t constant_border_value, GradientDimension gradient_dimension)
     {
         // Create tensors
         TensorType src   = create_tensor<TensorType>(shape, data_type_from_format(format));
@@ -133,7 +133,21 @@
         dst_y.info()->set_format(info<FunctionType>::dst_format);
 
         FunctionType sobel;
-        sobel.configure(&src, &dst_x, &dst_y, border_mode, constant_border_value);
+
+        switch(gradient_dimension)
+        {
+            case GradientDimension::GRAD_X:
+                sobel.configure(&src, &dst_x, nullptr, border_mode, constant_border_value);
+                break;
+            case GradientDimension::GRAD_Y:
+                sobel.configure(&src, nullptr, &dst_y, border_mode, constant_border_value);
+                break;
+            case GradientDimension::GRAD_XY:
+                sobel.configure(&src, &dst_x, &dst_y, border_mode, constant_border_value);
+                break;
+            default:
+                ARM_COMPUTE_ERROR("Gradient dimension not supported");
+        }
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(dst_x.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -157,7 +171,8 @@
         return std::make_pair(std::move(dst_x), std::move(dst_y));
     }
 
-    std::pair<SimpleTensor<U>, SimpleTensor<U>> compute_reference(const TensorShape &shape, int filter_size, BorderMode border_mode, Format format, uint8_t constant_border_value)
+    std::pair<SimpleTensor<U>, SimpleTensor<U>> compute_reference(const TensorShape &shape, int filter_size, BorderMode border_mode, Format format, uint8_t constant_border_value,
+                                                                  GradientDimension gradient_dimension)
     {
         // Create reference
         SimpleTensor<T> src{ shape, format };
@@ -165,7 +180,7 @@
         // Fill reference
         fill(src);
 
-        return reference::sobel<U>(src, filter_size, border_mode, constant_border_value);
+        return reference::sobel<U>(src, filter_size, border_mode, constant_border_value, gradient_dimension);
     }
 
     BorderMode _border_mode{ BorderMode::UNDEFINED };
diff --git a/tests/validation/fixtures/SoftmaxLayerFixture.h b/tests/validation/fixtures/SoftmaxLayerFixture.h
index 9c8f044..c2ab2e2 100644
--- a/tests/validation/fixtures/SoftmaxLayerFixture.h
+++ b/tests/validation/fixtures/SoftmaxLayerFixture.h
@@ -32,7 +32,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/SoftmaxLayer.h"
+#include "tests/validation/reference/SoftmaxLayer.h"
 
 #include <random>
 
@@ -43,27 +43,33 @@
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class SoftmaxValidationFixedPointFixture : public framework::Fixture
+class SoftmaxValidationGenericFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type, int fractional_bits)
+    void setup(TensorShape shape, DataType data_type, int fractional_bits, QuantizationInfo quantization_info, float beta)
     {
-        _fractional_bits = fractional_bits;
+        _fractional_bits   = fractional_bits;
+        _quantization_info = quantization_info;
 
-        _target    = compute_target(shape, data_type, fractional_bits);
-        _reference = compute_reference(shape, data_type, fractional_bits);
+        _target    = compute_target(shape, data_type, fractional_bits, quantization_info, beta);
+        _reference = compute_reference(shape, data_type, fractional_bits, quantization_info, beta);
     }
 
 protected:
     template <typename U>
     void fill(U &&tensor)
     {
-        if(_fractional_bits == 0)
+        if(!is_data_type_quantized(tensor.data_type()))
         {
             std::uniform_real_distribution<> distribution(-1000.f, 1000.f);
             library->fill(tensor, distribution, 0);
         }
+        else if(is_data_type_quantized_asymmetric(tensor.data_type()))
+        {
+            std::uniform_int_distribution<> distribution(0, 100);
+            library->fill(tensor, distribution, 0);
+        }
         else
         {
             const int                       one_fixed = 1 << _fractional_bits;
@@ -72,15 +78,16 @@
         }
     }
 
-    TensorType compute_target(const TensorShape &shape, DataType data_type, int fixed_point_position = 0)
+    TensorType compute_target(const TensorShape &shape, DataType data_type, int fixed_point_position,
+                              QuantizationInfo quantization_info, float beta)
     {
         // Create tensors
-        TensorType src = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position);
-        TensorType dst = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position);
+        TensorType src = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position, quantization_info);
+        TensorType dst = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position, QuantizationInfo(1.f / 256, 0));
 
         // Create and configure function
         FunctionType smx_layer;
-        smx_layer.configure(&src, &dst);
+        smx_layer.configure(&src, &dst, beta);
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -101,30 +108,66 @@
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, int fixed_point_position = 0)
+    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, int fixed_point_position,
+                                      QuantizationInfo quantization_info, float beta)
     {
         // Create reference
-        SimpleTensor<T> src{ shape, data_type, 1, fixed_point_position };
+        SimpleTensor<T> src{ shape, data_type, 1, fixed_point_position, quantization_info };
 
         // Fill reference
         fill(src);
 
-        return reference::softmax_layer<T>(src);
+        return reference::softmax_layer<T>(src, beta);
     }
 
-    TensorType      _target{};
-    SimpleTensor<T> _reference{};
-    int             _fractional_bits{};
+    TensorType       _target{};
+    SimpleTensor<T>  _reference{};
+    int              _fractional_bits{};
+    QuantizationInfo _quantization_info{};
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class SoftmaxValidationFixture : public SoftmaxValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>
+class SoftmaxValidationFixture : public SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type)
+    void setup(TensorShape shape, DataType data_type, float beta)
     {
-        SoftmaxValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, 0);
+        SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape,
+                                                                                          data_type,
+                                                                                          0,
+                                                                                          QuantizationInfo(),
+                                                                                          beta);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class SoftmaxValidationFixedPointFixture : public SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type, int fixed_point_position)
+    {
+        SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape,
+                                                                                          data_type,
+                                                                                          fixed_point_position,
+                                                                                          QuantizationInfo(),
+                                                                                          1.0f);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class SoftmaxValidationQuantizedFixture : public SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type, QuantizationInfo quantization_info, float beta)
+    {
+        SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape,
+                                                                                          data_type,
+                                                                                          0,
+                                                                                          quantization_info,
+                                                                                          beta);
     }
 };
 } // namespace validation
diff --git a/tests/validation/fixtures/TableLookupFixture.h b/tests/validation/fixtures/TableLookupFixture.h
index 49e0f41..6886c7e 100644
--- a/tests/validation/fixtures/TableLookupFixture.h
+++ b/tests/validation/fixtures/TableLookupFixture.h
@@ -32,8 +32,8 @@
 #include "tests/RawLutAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/TableLookup.h"
 #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/TableLookup.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/fixtures/ThresholdFixture.h b/tests/validation/fixtures/ThresholdFixture.h
index 0ea43a5..9a92175 100644
--- a/tests/validation/fixtures/ThresholdFixture.h
+++ b/tests/validation/fixtures/ThresholdFixture.h
@@ -29,7 +29,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/Threshold.h"
+#include "tests/validation/reference/Threshold.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/fixtures/L2NormalizeFixture.h b/tests/validation/fixtures/TransposeFixture.h
similarity index 77%
copy from tests/validation/fixtures/L2NormalizeFixture.h
copy to tests/validation/fixtures/TransposeFixture.h
index e611393..c798162 100644
--- a/tests/validation/fixtures/L2NormalizeFixture.h
+++ b/tests/validation/fixtures/TransposeFixture.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE
-#define ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE
+#ifndef ARM_COMPUTE_TEST_TRANSPOSE_FIXTURE
+#define ARM_COMPUTE_TEST_TRANSPOSE_FIXTURE
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
@@ -32,7 +32,7 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/L2Normalize.h"
+#include "tests/validation/reference/Transpose.h"
 
 namespace arm_compute
 {
@@ -41,14 +41,14 @@
 namespace validation
 {
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class L2NormalizeValidationFixture : public framework::Fixture
+class TransposeValidationFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type, unsigned int axis, float epsilon)
+    void setup(TensorShape shape, DataType data_type)
     {
-        _target    = compute_target(shape, data_type, axis, epsilon);
-        _reference = compute_reference(shape, data_type, axis, epsilon);
+        _target    = compute_target(shape, data_type);
+        _reference = compute_reference(shape, data_type);
     }
 
 protected:
@@ -58,15 +58,18 @@
         library->fill_tensor_uniform(tensor, 0);
     }
 
-    TensorType compute_target(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon)
+    TensorType compute_target(const TensorShape &shape, DataType data_type)
     {
+        // Make rows the columns of the original shape
+        TensorShape output_shape{ shape[1], shape[0] };
+
         // Create tensors
         TensorType src = create_tensor<TensorType>(shape, data_type);
-        TensorType dst = create_tensor<TensorType>(shape, data_type);
+        TensorType dst = create_tensor<TensorType>(output_shape, data_type);
 
         // Create and configure function
-        FunctionType l2_norm_func;
-        l2_norm_func.configure(&src, &dst, axis, epsilon);
+        FunctionType trans_func;
+        trans_func.configure(&src, &dst);
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -82,12 +85,12 @@
         fill(AccessorType(src));
 
         // Compute function
-        l2_norm_func.run();
+        trans_func.run();
 
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon)
+    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type)
     {
         // Create reference
         SimpleTensor<T> src{ shape, data_type };
@@ -95,7 +98,7 @@
         // Fill reference
         fill(src);
 
-        return reference::l2_normalize<T>(src, axis, epsilon);
+        return reference::transpose<T>(src);
     }
 
     TensorType      _target{};
@@ -104,4 +107,4 @@
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE */
+#endif /* ARM_COMPUTE_TEST_TRANSPOSE_FIXTURE */
diff --git a/tests/validation/fixtures/WarpAffineFixture.h b/tests/validation/fixtures/WarpAffineFixture.h
index fef1f6b..c027072 100644
--- a/tests/validation/fixtures/WarpAffineFixture.h
+++ b/tests/validation/fixtures/WarpAffineFixture.h
@@ -33,8 +33,8 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/Utils.h"
-#include "tests/validation/CPP/WarpAffine.h"
+#include "tests/validation/reference/Utils.h"
+#include "tests/validation/reference/WarpAffine.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/fixtures/WarpPerspectiveFixture.h b/tests/validation/fixtures/WarpPerspectiveFixture.h
index c77efbd..c804fa6 100644
--- a/tests/validation/fixtures/WarpPerspectiveFixture.h
+++ b/tests/validation/fixtures/WarpPerspectiveFixture.h
@@ -31,8 +31,8 @@
 #include "tests/IAccessor.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/Utils.h"
-#include "tests/validation/CPP/WarpPerspective.h"
+#include "tests/validation/reference/Utils.h"
+#include "tests/validation/reference/WarpPerspective.h"
 
 #include <random>
 
@@ -70,9 +70,9 @@
 
 protected:
     template <typename U>
-    void fill(U &&tensor, int i = 0)
+    void fill(U &&tensor)
     {
-        library->fill_tensor_uniform(tensor, i);
+        library->fill_tensor_uniform(tensor, 0);
     }
 
     TensorType compute_target(const TensorShape &shape, const TensorShape &vmask_shape, const float *matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value,
@@ -112,19 +112,21 @@
 
         // Create reference
         SimpleTensor<T> src{ shape, data_type };
-        SimpleTensor<T> valid_mask{ vmask_shape, data_type };
+
+        // Create the valid mask Tensor
+        _valid_mask = SimpleTensor<T>(shape, data_type);
 
         // Fill reference
-        fill(src, 0);
-        fill(valid_mask, 1);
+        fill(src);
 
         // Compute reference
-        return reference::warp_perspective<T>(src, valid_mask, matrix, policy, border_mode, constant_border_value);
+        return reference::warp_perspective<T>(src, _valid_mask, matrix, policy, border_mode, constant_border_value);
     }
 
     TensorType      _target{};
     SimpleTensor<T> _reference{};
     BorderMode      _border_mode{};
+    SimpleTensor<T> _valid_mask{};
 };
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/fixtures/WinogradLayerFixture.h b/tests/validation/fixtures/WinogradLayerFixture.h
new file mode 100644
index 0000000..7aa26c7
--- /dev/null
+++ b/tests/validation/fixtures/WinogradLayerFixture.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_WINOGRAD_LAYER_FIXTURE
+#define ARM_COMPUTE_TEST_WINOGRAD_LAYER_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/ConvolutionLayer.h"
+#include "tests/validation/reference/Utils.h"
+
+#include <random>
+
+namespace arm_compute
+{
+class NEWinogradLayer;
+
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class WinogradLayerValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info)
+    {
+        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info);
+        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i, float min, float max)
+    {
+        switch(tensor.data_type())
+        {
+            case DataType::F32:
+            {
+                std::uniform_real_distribution<> distribution(min, max);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            default:
+            {
+                ARM_COMPUTE_ERROR("Not supported");
+                library->fill_tensor_uniform(tensor, i);
+                break;
+            }
+        }
+    }
+
+    TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info)
+    {
+        // Create tensors
+        TensorType src     = create_tensor<TensorType>(input_shape, DataType::F32, 1);
+        TensorType weights = create_tensor<TensorType>(weights_shape, DataType::F32, 1);
+        TensorType bias    = create_tensor<TensorType>(bias_shape, DataType::F32, 1);
+        TensorType dst     = create_tensor<TensorType>(output_shape, DataType::F32, 1);
+
+        // Create and configure function
+        FunctionType conv;
+        conv.configure(&src, &weights, nullptr, &dst, info);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        weights.allocator()->allocate();
+        bias.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src), 0, -1.f, 1.f);
+        fill(AccessorType(weights), 1, -1.f, 1.f);
+        fill(AccessorType(bias), 2, 0.f, 0.f);
+        fill(AccessorType(dst), 3, -1.f, 1.f);
+
+        // Compute NEWinogradLayer function
+        conv.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info)
+    {
+        // Create reference
+        SimpleTensor<T> src{ input_shape, DataType::F32, 1 };
+        SimpleTensor<T> weights{ weights_shape, DataType::F32, 1 };
+        SimpleTensor<T> bias{ bias_shape, DataType::F32, 1 };
+
+        // Fill reference
+        fill(src, 0, -1.f, 1.f);
+        fill(weights, 1, -1.f, 1.f);
+        fill(bias, 2, 0.f, 0.f);
+
+        return reference::convolution_layer<T>(src, weights, bias, output_shape, info);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+    int             _fractional_bits{};
+    DataType        _data_type{};
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_WINOGRAD_LAYER_FIXTURE */
diff --git a/tests/validation/reference/AbsoluteDifference.cpp b/tests/validation/reference/AbsoluteDifference.cpp
new file mode 100644
index 0000000..f518e67
--- /dev/null
+++ b/tests/validation/reference/AbsoluteDifference.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "AbsoluteDifference.h"
+
+#include "arm_compute/core/Types.h"
+#include "tests/validation/FixedPoint.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> absolute_difference(const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, DataType dst_data_type)
+{
+    SimpleTensor<T> result(src1.shape(), dst_data_type);
+
+    using intermediate_type = typename common_promoted_signed_type<T>::intermediate_type;
+
+    for(int i = 0; i < src1.num_elements(); ++i)
+    {
+        intermediate_type val = std::abs(static_cast<intermediate_type>(src1[i]) - static_cast<intermediate_type>(src2[i]));
+        result[i]             = saturate_cast<T>(val);
+    }
+
+    return result;
+}
+
+template SimpleTensor<uint8_t> absolute_difference(const SimpleTensor<uint8_t> &src1, const SimpleTensor<uint8_t> &src2, DataType dst_data_type);
+template SimpleTensor<int16_t> absolute_difference(const SimpleTensor<int16_t> &src1, const SimpleTensor<int16_t> &src2, DataType dst_data_type);
+template SimpleTensor<int8_t> absolute_difference(const SimpleTensor<int8_t> &src1, const SimpleTensor<int8_t> &src2, DataType dst_data_type);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/ArithmeticSubtraction.h b/tests/validation/reference/AbsoluteDifference.h
similarity index 81%
copy from tests/validation/CPP/ArithmeticSubtraction.h
copy to tests/validation/reference/AbsoluteDifference.h
index 18b0d12..b14da15 100644
--- a/tests/validation/CPP/ArithmeticSubtraction.h
+++ b/tests/validation/reference/AbsoluteDifference.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_ARITHMETIC_SUBTRACTION_H__
-#define __ARM_COMPUTE_TEST_ARITHMETIC_SUBTRACTION_H__
+#ifndef __ARM_COMPUTE_TEST_ABSOLUTE_DIFFERENCE_H__
+#define __ARM_COMPUTE_TEST_ABSOLUTE_DIFFERENCE_H__
 
 #include "tests/SimpleTensor.h"
 #include "tests/validation/Helpers.h"
@@ -36,9 +36,9 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> arithmetic_subtraction(const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
+SimpleTensor<T> absolute_difference(const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, DataType dst_data_type);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_ARITHMETIC_SUBTRACTION_H__ */
+#endif /* __ARM_COMPUTE_TEST_ABSOLUTE_DIFFERENCE_H__ */
diff --git a/tests/validation/reference/Accumulate.cpp b/tests/validation/reference/Accumulate.cpp
new file mode 100644
index 0000000..29a2007
--- /dev/null
+++ b/tests/validation/reference/Accumulate.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Accumulate.h"
+
+#include "arm_compute/core/Types.h"
+#include "tests/validation/FixedPoint.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T1, typename T2>
+SimpleTensor<T2> accumulate(const SimpleTensor<T1> &src, DataType output_data_type)
+{
+    SimpleTensor<T2> dst{ src.shape(), output_data_type };
+
+    library->fill_tensor_uniform(dst, 1, static_cast<T2>(0), static_cast<T2>(std::numeric_limits<T1>::max()));
+
+    using intermediate_type = typename common_promoted_signed_type<T1, T2>::intermediate_type;
+
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        intermediate_type val = static_cast<intermediate_type>(src[i]) + static_cast<intermediate_type>(dst[i]);
+        dst[i]                = saturate_cast<T2>(val);
+    }
+
+    return dst;
+}
+
+template <typename T1, typename T2>
+SimpleTensor<T2> accumulate_weighted(const SimpleTensor<T1> &src, float alpha, DataType output_data_type)
+{
+    ARM_COMPUTE_ERROR_ON_MSG(alpha < 0.f || alpha > 1.f, "Weight (alpha) specified in accumulate_weighted must be within the range [0, 1]");
+
+    SimpleTensor<T2> dst{ src.shape(), output_data_type };
+
+    library->fill_tensor_uniform(dst, 1, static_cast<T2>(0), static_cast<T2>(std::numeric_limits<T1>::max()));
+
+    using intermediate_type = typename common_promoted_signed_type<T1, T2>::intermediate_type;
+
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        double val = (1. - static_cast<double>(alpha)) * static_cast<intermediate_type>(dst[i]) + static_cast<double>(alpha) * static_cast<intermediate_type>(src[i]);
+        dst[i]     = static_cast<T2>(val);
+    }
+
+    return dst;
+}
+
+template <typename T1, typename T2>
+SimpleTensor<T2> accumulate_squared(const SimpleTensor<T1> &src, uint32_t shift, DataType output_data_type)
+{
+    ARM_COMPUTE_ERROR_ON_MSG(shift > 15, "Shift in accumulate_squared must be within the range [0, 15]");
+
+    SimpleTensor<T2> dst{ src.shape(), output_data_type };
+
+    library->fill_tensor_uniform(dst, 1, static_cast<T2>(0), static_cast<T2>(std::numeric_limits<T1>::max()));
+
+    using intermediate_type = typename common_promoted_signed_type<T1, T2>::intermediate_type;
+    intermediate_type denom = 1 << shift;
+
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        intermediate_type val = static_cast<intermediate_type>(dst[i]) + (static_cast<intermediate_type>(src[i]) * static_cast<intermediate_type>(src[i]) / denom);
+        dst[i]                = saturate_cast<T2>(val);
+    }
+
+    return dst;
+}
+
+template SimpleTensor<int16_t> accumulate(const SimpleTensor<uint8_t> &src, DataType output_data_type);
+template SimpleTensor<uint8_t> accumulate_weighted(const SimpleTensor<uint8_t> &src, float alpha, DataType output_data_type);
+template SimpleTensor<int16_t> accumulate_squared(const SimpleTensor<uint8_t> &src, uint32_t shift, DataType output_data_type);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/ArithmeticSubtraction.h b/tests/validation/reference/Accumulate.h
similarity index 71%
copy from tests/validation/CPP/ArithmeticSubtraction.h
copy to tests/validation/reference/Accumulate.h
index 18b0d12..faa570b 100644
--- a/tests/validation/CPP/ArithmeticSubtraction.h
+++ b/tests/validation/reference/Accumulate.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_ARITHMETIC_SUBTRACTION_H__
-#define __ARM_COMPUTE_TEST_ARITHMETIC_SUBTRACTION_H__
+#ifndef __ARM_COMPUTE_TEST_ACCUMULATE_H__
+#define __ARM_COMPUTE_TEST_ACCUMULATE_H__
 
 #include "tests/SimpleTensor.h"
 #include "tests/validation/Helpers.h"
@@ -35,10 +35,16 @@
 {
 namespace reference
 {
-template <typename T>
-SimpleTensor<T> arithmetic_subtraction(const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
+template <typename T1, typename T2>
+SimpleTensor<T2> accumulate(const SimpleTensor<T1> &src, DataType output_data_type);
+
+template <typename T1, typename T2>
+SimpleTensor<T2> accumulate_weighted(const SimpleTensor<T1> &src, float alpha, DataType output_data_type);
+
+template <typename T1, typename T2>
+SimpleTensor<T2> accumulate_squared(const SimpleTensor<T1> &src, uint32_t shift, DataType output_data_type);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_ARITHMETIC_SUBTRACTION_H__ */
+#endif /* __ARM_COMPUTE_TEST_ACCUMULATE_H__ */
diff --git a/tests/validation/CPP/ActivationLayer.cpp b/tests/validation/reference/ActivationLayer.cpp
similarity index 94%
rename from tests/validation/CPP/ActivationLayer.cpp
rename to tests/validation/reference/ActivationLayer.cpp
index 2243e6f..df7f653 100644
--- a/tests/validation/CPP/ActivationLayer.cpp
+++ b/tests/validation/reference/ActivationLayer.cpp
@@ -154,6 +154,15 @@
     return dst;
 }
 
+template <>
+SimpleTensor<uint8_t> activation_layer<uint8_t>(const SimpleTensor<uint8_t> &src, ActivationLayerInfo info)
+{
+    SimpleTensor<float>   src_tmp = convert_from_asymmetric(src);
+    SimpleTensor<float>   dst_tmp = activation_layer<float>(src_tmp, info);
+    SimpleTensor<uint8_t> dst     = convert_to_asymmetric(dst_tmp, src.quantization_info());
+    return dst;
+}
+
 template SimpleTensor<float> activation_layer(const SimpleTensor<float> &src, ActivationLayerInfo info);
 template SimpleTensor<half> activation_layer(const SimpleTensor<half> &src, ActivationLayerInfo info);
 template SimpleTensor<qint8_t> activation_layer(const SimpleTensor<qint8_t> &src, ActivationLayerInfo info);
diff --git a/tests/validation/CPP/ActivationLayer.h b/tests/validation/reference/ActivationLayer.h
similarity index 100%
rename from tests/validation/CPP/ActivationLayer.h
rename to tests/validation/reference/ActivationLayer.h
diff --git a/tests/validation/CPP/ArithmeticAddition.cpp b/tests/validation/reference/ArithmeticAddition.cpp
similarity index 100%
rename from tests/validation/CPP/ArithmeticAddition.cpp
rename to tests/validation/reference/ArithmeticAddition.cpp
diff --git a/tests/validation/CPP/ArithmeticAddition.h b/tests/validation/reference/ArithmeticAddition.h
similarity index 100%
rename from tests/validation/CPP/ArithmeticAddition.h
rename to tests/validation/reference/ArithmeticAddition.h
diff --git a/tests/validation/CPP/ArithmeticSubtraction.cpp b/tests/validation/reference/ArithmeticSubtraction.cpp
similarity index 73%
rename from tests/validation/CPP/ArithmeticSubtraction.cpp
rename to tests/validation/reference/ArithmeticSubtraction.cpp
index 80bdb15..bed2d37 100644
--- a/tests/validation/CPP/ArithmeticSubtraction.cpp
+++ b/tests/validation/reference/ArithmeticSubtraction.cpp
@@ -34,23 +34,26 @@
 {
 namespace reference
 {
-template <typename T>
-SimpleTensor<T> arithmetic_subtraction(const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, DataType dst_data_type, ConvertPolicy convert_policy)
+template <typename T1, typename T2, typename T3>
+SimpleTensor<T3> arithmetic_subtraction(const SimpleTensor<T1> &src1, const SimpleTensor<T2> &src2, DataType dst_data_type, ConvertPolicy convert_policy)
 {
-    SimpleTensor<T> result(src1.shape(), dst_data_type);
+    SimpleTensor<T3> result(src1.shape(), dst_data_type);
 
-    using intermediate_type = typename common_promoted_signed_type<T>::intermediate_type;
+    using intermediate_type = typename common_promoted_signed_type<typename std::conditional<sizeof(T1) >= sizeof(T2), T1, T2>::type >::intermediate_type;
 
     for(int i = 0; i < src1.num_elements(); ++i)
     {
         intermediate_type val = static_cast<intermediate_type>(src1[i]) - static_cast<intermediate_type>(src2[i]);
-        result[i]             = (convert_policy == ConvertPolicy::SATURATE) ? saturate_cast<T>(val) : static_cast<T>(val);
+        result[i]             = (convert_policy == ConvertPolicy::SATURATE) ? saturate_cast<T3>(val) : static_cast<T3>(val);
     }
 
     return result;
 }
 
 template SimpleTensor<uint8_t> arithmetic_subtraction(const SimpleTensor<uint8_t> &src1, const SimpleTensor<uint8_t> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
+template SimpleTensor<int16_t> arithmetic_subtraction(const SimpleTensor<uint8_t> &src1, const SimpleTensor<uint8_t> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
+template SimpleTensor<int16_t> arithmetic_subtraction(const SimpleTensor<uint8_t> &src1, const SimpleTensor<int16_t> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
+template SimpleTensor<int16_t> arithmetic_subtraction(const SimpleTensor<int16_t> &src1, const SimpleTensor<uint8_t> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
 template SimpleTensor<int16_t> arithmetic_subtraction(const SimpleTensor<int16_t> &src1, const SimpleTensor<int16_t> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
 template SimpleTensor<int8_t> arithmetic_subtraction(const SimpleTensor<int8_t> &src1, const SimpleTensor<int8_t> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
 template SimpleTensor<half> arithmetic_subtraction(const SimpleTensor<half> &src1, const SimpleTensor<half> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
diff --git a/tests/validation/CPP/ArithmeticSubtraction.h b/tests/validation/reference/ArithmeticSubtraction.h
similarity index 88%
rename from tests/validation/CPP/ArithmeticSubtraction.h
rename to tests/validation/reference/ArithmeticSubtraction.h
index 18b0d12..9308314 100644
--- a/tests/validation/CPP/ArithmeticSubtraction.h
+++ b/tests/validation/reference/ArithmeticSubtraction.h
@@ -35,8 +35,8 @@
 {
 namespace reference
 {
-template <typename T>
-SimpleTensor<T> arithmetic_subtraction(const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
+template <typename T1, typename T2, typename T3>
+SimpleTensor<T3> arithmetic_subtraction(const SimpleTensor<T1> &src1, const SimpleTensor<T2> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/CPP/BatchNormalizationLayer.cpp b/tests/validation/reference/BatchNormalizationLayer.cpp
similarity index 97%
rename from tests/validation/CPP/BatchNormalizationLayer.cpp
rename to tests/validation/reference/BatchNormalizationLayer.cpp
index 37e2d55..e4446d1 100644
--- a/tests/validation/CPP/BatchNormalizationLayer.cpp
+++ b/tests/validation/reference/BatchNormalizationLayer.cpp
@@ -44,7 +44,7 @@
     const auto cols       = static_cast<int>(src.shape()[0]);
     const auto rows       = static_cast<int>(src.shape()[1]);
     const auto depth      = static_cast<int>(src.shape()[2]);
-    int        upper_dims = src.shape().total_size() / (cols * rows * depth);
+    const int  upper_dims = src.shape().total_size() / (cols * rows * depth);
 
     for(int r = 0; r < upper_dims; ++r)
     {
@@ -88,7 +88,7 @@
     const auto cols       = static_cast<int>(src.shape()[0]);
     const auto rows       = static_cast<int>(src.shape()[1]);
     const auto depth      = static_cast<int>(src.shape()[2]);
-    int        upper_dims = src.shape().total_size() / (cols * rows * depth);
+    const int  upper_dims = src.shape().total_size() / (cols * rows * depth);
 
     for(int r = 0; r < upper_dims; ++r)
     {
diff --git a/tests/validation/CPP/BatchNormalizationLayer.h b/tests/validation/reference/BatchNormalizationLayer.h
similarity index 100%
rename from tests/validation/CPP/BatchNormalizationLayer.h
rename to tests/validation/reference/BatchNormalizationLayer.h
diff --git a/tests/validation/CPP/BitwiseAnd.cpp b/tests/validation/reference/BitwiseAnd.cpp
similarity index 100%
rename from tests/validation/CPP/BitwiseAnd.cpp
rename to tests/validation/reference/BitwiseAnd.cpp
diff --git a/tests/validation/CPP/BitwiseAnd.h b/tests/validation/reference/BitwiseAnd.h
similarity index 100%
rename from tests/validation/CPP/BitwiseAnd.h
rename to tests/validation/reference/BitwiseAnd.h
diff --git a/tests/validation/CPP/BitwiseNot.cpp b/tests/validation/reference/BitwiseNot.cpp
similarity index 100%
rename from tests/validation/CPP/BitwiseNot.cpp
rename to tests/validation/reference/BitwiseNot.cpp
diff --git a/tests/validation/CPP/BitwiseNot.h b/tests/validation/reference/BitwiseNot.h
similarity index 100%
rename from tests/validation/CPP/BitwiseNot.h
rename to tests/validation/reference/BitwiseNot.h
diff --git a/tests/validation/CPP/BitwiseOr.cpp b/tests/validation/reference/BitwiseOr.cpp
similarity index 100%
rename from tests/validation/CPP/BitwiseOr.cpp
rename to tests/validation/reference/BitwiseOr.cpp
diff --git a/tests/validation/CPP/BitwiseOr.h b/tests/validation/reference/BitwiseOr.h
similarity index 100%
rename from tests/validation/CPP/BitwiseOr.h
rename to tests/validation/reference/BitwiseOr.h
diff --git a/tests/validation/CPP/BitwiseXor.cpp b/tests/validation/reference/BitwiseXor.cpp
similarity index 100%
rename from tests/validation/CPP/BitwiseXor.cpp
rename to tests/validation/reference/BitwiseXor.cpp
diff --git a/tests/validation/CPP/BitwiseXor.h b/tests/validation/reference/BitwiseXor.h
similarity index 100%
rename from tests/validation/CPP/BitwiseXor.h
rename to tests/validation/reference/BitwiseXor.h
diff --git a/tests/validation/CPP/Box3x3.cpp b/tests/validation/reference/Box3x3.cpp
similarity index 100%
rename from tests/validation/CPP/Box3x3.cpp
rename to tests/validation/reference/Box3x3.cpp
diff --git a/tests/validation/CPP/Box3x3.h b/tests/validation/reference/Box3x3.h
similarity index 100%
rename from tests/validation/CPP/Box3x3.h
rename to tests/validation/reference/Box3x3.h
diff --git a/tests/validation/reference/ConvolutionLayer.cpp b/tests/validation/reference/ConvolutionLayer.cpp
new file mode 100644
index 0000000..1066411
--- /dev/null
+++ b/tests/validation/reference/ConvolutionLayer.cpp
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ConvolutionLayer.h"
+
+#include "tests/validation/FixedPoint.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/Utils.h"
+#include "tests/validation/reference/UtilsQuantizedAsymm.h"
+
+#include "tests/framework/Asserts.h"
+
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+inline bool is_valid_pixel(int i, int min, int max)
+{
+    return (i >= min && i < max);
+}
+
+// 3D convolution for floating point type
+template < typename T, typename TB, typename std::enable_if < is_floating_point<T>::value &&is_floating_point<TB>::value, int >::type = 0 >
+void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &out,
+                   int i_offset, int w_offset, int b_offset, int o_offset,
+                   int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights)
+{
+    const T *in_ptr  = in.data() + i_offset;
+    const T *w_ptr   = weights.data() + w_offset;
+    const TB *b_ptr   = bias.data() + b_offset;
+    T        *out_ptr = out.data() + o_offset;
+
+    const int half_width_weights  = width_weights / 2;
+    const int half_height_weights = height_weights / 2;
+
+    // Reset accumulator
+    T acc(0);
+
+    // Compute a 2D convolution for each IFM and accumulate the result
+    for(int ifm = 0; ifm < depth_in; ++ifm)
+    {
+        // Compute the offset for the input slice
+        const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
+
+        // Compute 2D convolution
+        for(int yk = -half_height_weights; yk <= half_height_weights; ++yk)
+        {
+            for(int xk = -half_width_weights; xk <= half_width_weights; ++xk)
+            {
+                // Check if the pixel is out-of-bound
+                if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
+                {
+                    const int idx = xk + half_width_weights;
+                    const int idy = yk + half_height_weights;
+
+                    const T i_value = in_ptr[offset_slice_in + xk + yk * width_in];
+                    const T w_value = w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights];
+
+                    acc += i_value * w_value;
+                }
+            }
+        }
+    }
+
+    // Accumulate the bias and store the result
+    *out_ptr = acc + (*b_ptr);
+}
+
+// 3D convolution for fixed point type
+template < typename T, typename TB, typename std::enable_if < std::is_integral<T>::value &&std::is_integral<TB>::value, int >::type = 0 >
+void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &out,
+                   int i_offset, int w_offset, int b_offset, int o_offset,
+                   int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights)
+{
+    const T *in_ptr               = in.data() + i_offset;
+    const T *w_ptr                = weights.data() + w_offset;
+    const T *b_ptr                = bias.data() + b_offset;
+    T       *out_ptr              = out.data() + o_offset;
+    int      fixed_point_position = in.fixed_point_position();
+
+    const int half_width_weights  = width_weights / 2;
+    const int half_height_weights = height_weights / 2;
+
+    using namespace fixed_point_arithmetic;
+    using promoted_type = fixed_point_arithmetic::traits::promote_t<T>;
+
+    // Reset accumulator
+    fixed_point<promoted_type> acc(0, fixed_point_position);
+
+    // Compute a 2D convolution for each IFM and accumulate the result
+    for(int ifm = 0; ifm < depth_in; ++ifm)
+    {
+        // Compute the offset for the input slice
+        const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
+
+        // Compute 2D convolution
+        for(int yk = -half_height_weights; yk <= half_height_weights; ++yk)
+        {
+            for(int xk = -half_width_weights; xk <= half_width_weights; ++xk)
+            {
+                // Check if the pixel is out-of-bound
+                if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
+                {
+                    const int idx = xk + half_width_weights;
+                    const int idy = yk + half_height_weights;
+
+                    const fixed_point<promoted_type> i_value(in_ptr[offset_slice_in + xk + yk * width_in], fixed_point_position, true);
+                    const fixed_point<promoted_type> w_value(w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights], fixed_point_position, true);
+                    const fixed_point<promoted_type> iw = i_value * w_value;
+                    acc                                 = iw + acc;
+                }
+            }
+        }
+    }
+
+    // Get the bias
+    const fixed_point<promoted_type> b(*b_ptr, fixed_point_position, true);
+
+    // Accumulate the bias and covert back
+    acc = acc + b;
+    fixed_point<T> res(acc);
+    *out_ptr = res.raw();
+}
+
+// 3D convolution for QASYMM8 type
+template <>
+void convolution3d(const SimpleTensor<uint8_t> &in, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<uint8_t> &out,
+                   int i_offset, int w_offset, int b_offset, int o_offset,
+                   int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights)
+{
+    const uint8_t *in_ptr  = in.data() + i_offset;
+    const uint8_t *w_ptr   = weights.data() + w_offset;
+    const int32_t *b_ptr   = bias.data() + b_offset;
+    uint8_t       *out_ptr = out.data() + o_offset;
+
+    const int   input_offset   = -in.quantization_info().offset;
+    const float input_scale    = in.quantization_info().scale;
+    const int   weights_offset = -weights.quantization_info().offset;
+    const float weights_scale  = weights.quantization_info().scale;
+    const int   output_offset  = out.quantization_info().offset;
+    const float output_scale   = out.quantization_info().scale;
+
+    int         output_multiplier = 0;
+    int         output_shift      = 0;
+    const float multiplier        = input_scale * weights_scale / output_scale;
+    arm_compute::quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+
+    const int half_width_weights  = width_weights / 2;
+    const int half_height_weights = height_weights / 2;
+
+    // Reset accumulator
+    int32_t acc(0);
+
+    // Compute a 2D convolution for each IFM and accumulate the result
+    for(int ifm = 0; ifm < depth_in; ++ifm)
+    {
+        // Compute the offset for the input slice
+        const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
+
+        // Compute 2D convolution
+        for(int yk = -half_height_weights; yk <= half_height_weights; ++yk)
+        {
+            for(int xk = -half_width_weights; xk <= half_width_weights; ++xk)
+            {
+                // Check if the pixel is out-of-bound
+                if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
+                {
+                    const int idx = xk + half_width_weights;
+                    const int idy = yk + half_height_weights;
+
+                    const uint8_t i_value = in_ptr[offset_slice_in + xk + yk * width_in];
+                    const uint8_t w_value = w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights];
+
+                    acc += (i_value + input_offset) * (w_value + weights_offset);
+                }
+            }
+        }
+    }
+
+    // Accumulate the bias
+    acc += (*b_ptr);
+
+    acc = asymm_rounding_divide_by_pow2(asymm_int_mult(acc, output_multiplier), output_shift);
+    acc += output_offset;
+    acc = clamp<int32_t>(acc, 0, 255);
+
+    // Store the result
+    *out_ptr = acc;
+}
+} // namespace
+
+template <typename T, typename TB>
+SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info)
+{
+    // Create reference
+    SimpleTensor<T> dst{ output_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() };
+
+    // Compute reference
+    const int width_in       = src.shape().x();
+    const int height_in      = src.shape().y();
+    const int depth_in       = src.shape().z();
+    const int width_out      = dst.shape().x();
+    const int height_out     = dst.shape().y();
+    const int depth_out      = dst.shape().z();
+    const int width_weights  = weights.shape().x();
+    const int height_weights = weights.shape().y();
+    const int depth_weights  = weights.shape().z();
+    const int pad_left       = std::min(static_cast<int>(info.pad_left()), width_weights / 2);
+    const int pad_top        = std::min(static_cast<int>(info.pad_top()), height_weights / 2);
+    const int pad_right      = std::min(static_cast<int>(info.pad_right()), width_weights / 2);
+    const int pad_bottom     = std::min(static_cast<int>(info.pad_bottom()), height_weights / 2);
+
+    const int start_xi    = width_weights / 2 - pad_left;
+    const int start_yi    = height_weights / 2 - pad_top;
+    const int end_xi      = width_in + pad_left - width_weights / 2 + pad_right - width_weights / 2;
+    const int end_yi      = height_in + pad_top - height_weights / 2 + pad_bottom - height_weights / 2;
+    const int stride_xi   = info.stride().first;
+    const int stride_yi   = info.stride().second;
+    const int num_batches = src.shape().total_size() / (width_in * height_in * depth_in);
+
+    for(int r = 0; r < num_batches; ++r)
+    {
+        for(int yi = start_yi; yi < start_yi + end_yi; yi += stride_yi)
+        {
+            for(int xi = start_xi; xi < start_xi + end_xi; xi += stride_xi)
+            {
+                for(int ofm = 0; ofm < depth_out; ++ofm)
+                {
+                    // Compute input and output offsets
+                    const int offset_in  = r * width_in * height_in * depth_in;
+                    const int xo         = (xi - start_xi) / stride_xi;
+                    const int yo         = (yi - start_yi) / stride_yi;
+                    const int offset_out = xo + yo * width_out + ofm * width_out * height_out + r * width_out * height_out * depth_out;
+
+                    ARM_COMPUTE_ASSERT(xo < width_out);
+                    ARM_COMPUTE_ASSERT(yo < height_out);
+
+                    // Compute 3D convolution
+                    convolution3d(src, weights, bias, dst,
+                                  offset_in, ofm * width_weights * height_weights * depth_weights, ofm, offset_out,
+                                  xi, yi,
+                                  width_in, height_in, depth_in,
+                                  width_weights, height_weights);
+                }
+            }
+        }
+    }
+
+    return dst;
+}
+
+template SimpleTensor<float> convolution_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, const TensorShape &output_shape,
+                                               const PadStrideInfo &info);
+template SimpleTensor<half> convolution_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, const TensorShape &output_shape,
+                                              const PadStrideInfo &info);
+template SimpleTensor<qint8_t> convolution_layer(const SimpleTensor<qint8_t> &src, const SimpleTensor<qint8_t> &weights, const SimpleTensor<qint8_t> &bias, const TensorShape &output_shape,
+                                                 const PadStrideInfo &info);
+template SimpleTensor<qint16_t> convolution_layer(const SimpleTensor<qint16_t> &src, const SimpleTensor<qint16_t> &weights, const SimpleTensor<qint16_t> &bias, const TensorShape &output_shape,
+                                                  const PadStrideInfo &info);
+template SimpleTensor<uint8_t> convolution_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &output_shape,
+                                                 const PadStrideInfo &info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/ConvolutionLayer.h b/tests/validation/reference/ConvolutionLayer.h
similarity index 90%
rename from tests/validation/CPP/ConvolutionLayer.h
rename to tests/validation/reference/ConvolutionLayer.h
index 117e846..57455ba 100644
--- a/tests/validation/CPP/ConvolutionLayer.h
+++ b/tests/validation/reference/ConvolutionLayer.h
@@ -35,8 +35,8 @@
 {
 namespace reference
 {
-template <typename T>
-SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<T> &bias, const TensorShape &output_shape, const PadStrideInfo &info);
+template <typename T, typename TB>
+SimpleTensor<T> convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &output_shape, const PadStrideInfo &info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/DeconvolutionLayer.cpp b/tests/validation/reference/DeconvolutionLayer.cpp
new file mode 100644
index 0000000..82c2188
--- /dev/null
+++ b/tests/validation/reference/DeconvolutionLayer.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ConvolutionLayer.h"
+
+#include "tests/validation/FixedPoint.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<T> &bias, const TensorShape &output_shape,
+                                    const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &a)
+{
+    // Create reference
+    TensorShape scaled_shape = src.shape();
+    scaled_shape.set(0, output_shape.x());
+    scaled_shape.set(1, output_shape.y());
+    SimpleTensor<T> scaled{ scaled_shape, src.data_type(), 1, src.fixed_point_position() };
+
+    const int          width_in      = src.shape().x();
+    const int          height_in     = src.shape().y();
+    const int          width_scaled  = scaled.shape().x();
+    const int          height_scaled = scaled.shape().y();
+    const int          num_2d_slices = src.shape().total_size() / (width_in * height_in);
+    const float        width_ratio   = static_cast<float>(width_in) / static_cast<float>(width_scaled);
+    const float        height_ratio  = static_cast<float>(height_in) / static_cast<float>(height_scaled);
+    const int          ax            = a.first;  // The number of zeros added to right edge of the input.
+    const int          ay            = a.second; // The number of zeros added to bottom edge of the input.
+    const unsigned int kernel_size   = weights.shape().x();
+    ARM_COMPUTE_ERROR_ON(info.pad().first > (kernel_size - 1));
+    const int transposed_convolution_padx = kernel_size - info.pad().first - 1;
+    const int transposed_convolution_pady = kernel_size - info.pad().second - 1;
+    const int stridex                     = info.stride().first;
+    const int stridey                     = info.stride().second;
+    for(int j = 0; j < scaled.num_elements(); ++j)
+    {
+        scaled[j] = T(0);
+    }
+
+    for(int slice = 0; slice < num_2d_slices; ++slice)
+    {
+        const int offset_slice_in  = slice * width_in * height_in;
+        const int offset_slice_out = slice * width_scaled * height_scaled;
+        for(int yi = ay; yi < height_scaled; yi += stridey)
+        {
+            for(int xi = transposed_convolution_padx; xi < width_scaled; xi += stridex)
+            {
+                const float x_src     = (xi + 0.5f) * width_ratio - 0.5f;
+                const float y_src     = (yi + 0.5f) * height_ratio - 0.5f;
+                T          *out       = scaled.data() + offset_slice_out + xi + yi * width_scaled;
+                const bool  in_bounds = x_src > -1 && y_src > -1 && x_src < width_in && y_src < height_in;
+                const bool  in_axy    = xi < transposed_convolution_padx || xi >= (width_scaled - ax)  // this is checking if the x coordinate is in the padded left/right area
+                                        || yi < ay || yi >= (height_scaled - transposed_convolution_pady); // like above but top and bottom padding in the upscaled XY plane
+                if(!in_axy)
+                {
+                    if(in_bounds)
+                    {
+                        const int in_scaled_x = (x_src < 0.f) ? static_cast<int>(x_src - 0.5f) : static_cast<int>(x_src + 0.5f);
+                        const int in_scaled_y = (y_src < 0.f) ? static_cast<int>(y_src - 0.5f) : static_cast<int>(y_src + 0.5f);
+                        const T *in          = src.data() + offset_slice_in + in_scaled_x + in_scaled_y * width_in;
+                        *out                  = *in;
+                    }
+                    else
+                    {
+                        *out = T(0);
+                    }
+                }
+            }
+        }
+    }
+    const PadStrideInfo conv_info(1, 1, 1, 1, DimensionRoundingType::CEIL);
+    return convolution_layer(scaled, weights, bias, output_shape, conv_info);
+}
+
+template SimpleTensor<float> deconvolution_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, const TensorShape &output_shape,
+                                                 const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &a);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/DeconvolutionLayer.h b/tests/validation/reference/DeconvolutionLayer.h
new file mode 100644
index 0000000..8222e32
--- /dev/null
+++ b/tests/validation/reference/DeconvolutionLayer.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_DECONVOLUTION_LAYER_H__
+#define __ARM_COMPUTE_TEST_DECONVOLUTION_LAYER_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+/** Deconvolution reference implementation.
+ *
+ * src              Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32.
+ * weights          The 4d weights with dimensions [width, height, OFM, IFM]. Data type supported: Same as @p input.
+ * bias             Optional, ignored if NULL. The biases have one dimension. Data type supported: Same as @p input.
+ * output_shape     Output tensor shape. The output has the same number of dimensions as the @p input.
+ * info             Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
+ * a                The number of zeros added to right edge of the input.
+ *
+ */
+template <typename T>
+SimpleTensor<T> deconvolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<T> &bias, const TensorShape &output_shape, const PadStrideInfo &info,
+                                    const std::pair<unsigned int, unsigned int> &a);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_DECONVOLUTION_LAYER_H__ */
diff --git a/tests/validation/CPP/DepthConcatenateLayer.cpp b/tests/validation/reference/DepthConcatenateLayer.cpp
similarity index 100%
rename from tests/validation/CPP/DepthConcatenateLayer.cpp
rename to tests/validation/reference/DepthConcatenateLayer.cpp
diff --git a/tests/validation/CPP/DepthConcatenateLayer.h b/tests/validation/reference/DepthConcatenateLayer.h
similarity index 100%
rename from tests/validation/CPP/DepthConcatenateLayer.h
rename to tests/validation/reference/DepthConcatenateLayer.h
diff --git a/tests/validation/CPP/DepthConvert.cpp b/tests/validation/reference/DepthConvertLayer.cpp
similarity index 98%
rename from tests/validation/CPP/DepthConvert.cpp
rename to tests/validation/reference/DepthConvertLayer.cpp
index 110174a..dd095b8 100644
--- a/tests/validation/CPP/DepthConvert.cpp
+++ b/tests/validation/reference/DepthConvertLayer.cpp
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "DepthConvert.h"
+#include "DepthConvertLayer.h"
 
 #include "tests/validation/FixedPoint.h"
 #include "tests/validation/Helpers.h"
diff --git a/tests/validation/CPP/DepthConvert.h b/tests/validation/reference/DepthConvertLayer.h
similarity index 100%
rename from tests/validation/CPP/DepthConvert.h
rename to tests/validation/reference/DepthConvertLayer.h
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.cpp b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
new file mode 100644
index 0000000..0e88d3d
--- /dev/null
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "DepthwiseConvolutionLayer.h"
+
+#include "ConvolutionLayer.h"
+#include "Utils.h"
+
+#include "tests/validation/FixedPoint.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/Utils.h"
+#include "tests/validation/reference/UtilsQuantizedAsymm.h"
+
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+/** Perform a depthwise convolution
+ *
+ * - Three dimensions tensors
+ * - Third dimention is number of channels
+ * - Depths of input tensor and filter are equals
+ * - Padding, stride and output shape "match"
+ *
+ */
+template <typename T, typename TB>
+SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info)
+{
+    // Create reference
+    SimpleTensor<T> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position() };
+
+    // Compute reference
+    const int filter_width  = weights.shape().x();
+    const int filter_height = weights.shape().y();
+    const int filter_plane  = filter_width * filter_height;
+    const int input_width   = src.shape().x();
+    const int input_height  = src.shape().y();
+    const int input_depth   = src.shape().z();
+    const int num_batches   = src.shape().total_size() / (input_width * input_height * input_depth);
+
+    const int filter_half_width  = filter_width / 2;
+    const int filter_half_height = filter_height / 2;
+
+    const int pad_left   = std::min(static_cast<int>(conv_info.pad_left()), filter_half_width);
+    const int pad_top    = std::min(static_cast<int>(conv_info.pad_top()), filter_half_height);
+    const int pad_right  = std::min(static_cast<int>(conv_info.pad_right()), filter_half_width);
+    const int pad_bottom = std::min(static_cast<int>(conv_info.pad_bottom()), filter_half_height);
+
+    const int minimum_x = -pad_left + filter_half_width;
+    const int minimum_y = -pad_top + filter_half_height;
+    const int maximum_x = input_width + pad_left - filter_half_width + pad_right - filter_half_width;
+    const int maximum_y = input_height + pad_top - filter_half_height + pad_bottom - filter_half_height;
+
+    int out_pos = 0;
+    for(int r = 0; r < num_batches; ++r)
+    {
+        for(int z = 0; z < input_depth; ++z)
+        {
+            for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second)
+            {
+                for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first)
+                {
+                    Coordinates coords(static_cast<int>(x), static_cast<int>(y), static_cast<int>(z), static_cast<int>(r));
+                    size_t      filter_offset = filter_plane * z;
+
+                    T val = 0;
+                    for(int j = y - filter_half_height; j <= static_cast<int>(y + filter_half_height); ++j)
+                    {
+                        for(int i = x - filter_half_width; i <= static_cast<int>(x + filter_half_width); ++i)
+                        {
+                            coords.set(0, i);
+                            coords.set(1, j);
+                            val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, 0.f);
+                            ++filter_offset;
+                        }
+                    }
+                    coords.set(0, x);
+                    coords.set(1, y);
+                    dst[out_pos++] = saturate_cast<T>(val + *static_cast<const TB *>(biases(Coordinates(z))));
+                }
+            }
+        }
+    }
+
+    return dst;
+}
+
+template <>
+SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
+                                            const PadStrideInfo &conv_info)
+{
+    // Create reference
+    SimpleTensor<uint8_t> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() };
+
+    const int   input_offset   = -src.quantization_info().offset;
+    const float input_scale    = src.quantization_info().scale;
+    const int   weights_offset = -weights.quantization_info().offset;
+    const float weights_scale  = weights.quantization_info().scale;
+    const int   output_offset  = dst.quantization_info().offset;
+    const float output_scale   = dst.quantization_info().scale;
+
+    int         output_multiplier;
+    int         output_shift;
+    const float multiplier = input_scale * weights_scale / output_scale;
+    arm_compute::quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+
+    // Compute reference
+    const int filter_width  = weights.shape().x();
+    const int filter_height = weights.shape().y();
+    const int filter_plane  = filter_width * filter_height;
+    const int input_width   = src.shape().x();
+    const int input_height  = src.shape().y();
+    const int input_depth   = src.shape().z();
+    const int num_batches   = src.shape().total_size() / (input_width * input_height * input_depth);
+
+    const int filter_half_size = filter_width / 2;
+    const int pad_x            = std::min(filter_half_size, static_cast<int>(conv_info.pad().first));
+    const int pad_y            = std::min(filter_half_size, static_cast<int>(conv_info.pad().second));
+    const int minimum_x        = -pad_x + filter_half_size;
+    const int minimum_y        = -pad_y + filter_half_size;
+
+    int out_pos = 0;
+    for(int r = 0; r < num_batches; ++r)
+    {
+        for(int z = 0; z < input_depth; ++z)
+        {
+            int32_t bias_val = *static_cast<const int32_t *>(biases(Coordinates(z)));
+            for(int y = minimum_y; y < input_height + pad_y - filter_half_size; y += conv_info.stride().second)
+            {
+                for(int x = minimum_x; x < input_width + pad_x - filter_half_size; x += conv_info.stride().first)
+                {
+                    Coordinates coords(x, y, z);
+                    int         filter_offset = filter_plane * z;
+
+                    uint32_t val = 0;
+                    for(int j = y - filter_half_size; j <= (y + filter_half_size); ++j)
+                    {
+                        for(int i = x - filter_half_size; i <= (x + filter_half_size); ++i)
+                        {
+                            coords.set(0, i);
+                            coords.set(1, j);
+                            auto    in_val = tensor_elem_at<uint8_t>(src, coords, BorderMode::CONSTANT, 0);
+                            uint8_t w_val  = *(weights.data() + filter_offset);
+                            val += (in_val + input_offset) * (w_val + weights_offset);
+                            ++filter_offset;
+                        }
+                    }
+                    val += bias_val;
+                    val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift);
+                    val += output_offset;
+                    val = std::max<int32_t>(val, 0);
+                    val = std::min<int32_t>(val, 255);
+
+                    // Store the result
+                    dst[out_pos++] = val;
+                }
+            }
+        }
+    }
+
+    return dst;
+}
+
+template SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape,
+                                                   const PadStrideInfo &conv_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/DepthwiseConvolution.h b/tests/validation/reference/DepthwiseConvolutionLayer.h
similarity index 90%
rename from tests/validation/CPP/DepthwiseConvolution.h
rename to tests/validation/reference/DepthwiseConvolutionLayer.h
index 6be80fc..df743a5 100644
--- a/tests/validation/CPP/DepthwiseConvolution.h
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.h
@@ -35,8 +35,8 @@
 {
 namespace reference
 {
-template <typename T>
-SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const TensorShape &dst_shape, const PadStrideInfo &conv_info);
+template <typename T, typename TB>
+SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.cpp b/tests/validation/reference/DepthwiseSeparableConvolutionLayer.cpp
similarity index 72%
rename from tests/validation/CPP/DepthwiseSeparableConvolutionLayer.cpp
rename to tests/validation/reference/DepthwiseSeparableConvolutionLayer.cpp
index 3942ecf..ca6c168 100644
--- a/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.cpp
+++ b/tests/validation/reference/DepthwiseSeparableConvolutionLayer.cpp
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "DepthwiseConvolution.h"
+#include "DepthwiseConvolutionLayer.h"
 
 #include "DepthwiseSeparableConvolutionLayer.h"
 
@@ -40,19 +40,22 @@
 {
 // Depthwise separable convolution layer
 template <typename T>
-SimpleTensor<T> depthwise_separable_convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &depthwise_weights, const TensorShape &depthwise_out_shape,
+SimpleTensor<T> depthwise_separable_convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &depthwise_weights, const SimpleTensor<T> &depthwise_biases,
+                                                      const TensorShape     &depthwise_out_shape,
                                                       const SimpleTensor<T> &pointwise_weights,
-                                                      const SimpleTensor<T> &biases, const TensorShape &dst_shape, const PadStrideInfo &depthwise_conv_info, const PadStrideInfo &pointwise_conv_info)
+                                                      const SimpleTensor<T> &pointwise_biases, const TensorShape &dst_shape, const PadStrideInfo &depthwise_conv_info, const PadStrideInfo &pointwise_conv_info)
 {
     // Compute reference
-    SimpleTensor<T> depthwise_out = depthwise_convolution(src, depthwise_weights, depthwise_out_shape, depthwise_conv_info);
-    SimpleTensor<T> dst           = convolution_layer(depthwise_out, pointwise_weights, biases, dst_shape, pointwise_conv_info);
+    SimpleTensor<T> depthwise_out = depthwise_convolution(src, depthwise_weights, depthwise_biases, depthwise_out_shape, depthwise_conv_info);
+    SimpleTensor<T> dst           = convolution_layer(depthwise_out, pointwise_weights, pointwise_biases, dst_shape, pointwise_conv_info);
 
     return dst;
 }
 
-template SimpleTensor<float> depthwise_separable_convolution_layer(const SimpleTensor<float> &in, const SimpleTensor<float> &depthwise_weights, const TensorShape &depthwise_out_shape,
-                                                                   const SimpleTensor<float> &pointwise_weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape, const PadStrideInfo &depthwise_conv_info, const PadStrideInfo &pointwise_conv_info);
+template SimpleTensor<float> depthwise_separable_convolution_layer(const SimpleTensor<float> &in, const SimpleTensor<float> &depthwise_weights, const SimpleTensor<float> &depthwise_biases,
+                                                                   const TensorShape         &depthwise_out_shape,
+                                                                   const SimpleTensor<float> &pointwise_weights, const SimpleTensor<float> &pointwise_biases, const TensorShape &dst_shape, const PadStrideInfo &depthwise_conv_info,
+                                                                   const PadStrideInfo &pointwise_conv_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.h b/tests/validation/reference/DepthwiseSeparableConvolutionLayer.h
similarity index 82%
rename from tests/validation/CPP/DepthwiseSeparableConvolutionLayer.h
rename to tests/validation/reference/DepthwiseSeparableConvolutionLayer.h
index 71cd013..0fcce2c 100644
--- a/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.h
+++ b/tests/validation/reference/DepthwiseSeparableConvolutionLayer.h
@@ -36,9 +36,10 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> depthwise_separable_convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &depthwise_weights, const TensorShape &depthwise_out_shape,
-                                                      const SimpleTensor<T> &pointwise_weights,
-                                                      const SimpleTensor<T> &biases, const TensorShape &dst_shape, const PadStrideInfo &depthwise_conv_info, const PadStrideInfo &pointwise_conv_info);
+SimpleTensor<T> depthwise_separable_convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &depthwise_weights, const SimpleTensor<T> &depthwise_biases,
+                                                      const TensorShape     &depthwise_out_shape,
+                                                      const SimpleTensor<T> &pointwise_weights, const SimpleTensor<T> &pointwise_biases, const TensorShape &dst_shape,
+                                                      const PadStrideInfo &depthwise_conv_info, const PadStrideInfo &pointwise_conv_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/CPP/DequantizationLayer.cpp b/tests/validation/reference/DequantizationLayer.cpp
similarity index 100%
rename from tests/validation/CPP/DequantizationLayer.cpp
rename to tests/validation/reference/DequantizationLayer.cpp
diff --git a/tests/validation/CPP/DequantizationLayer.h b/tests/validation/reference/DequantizationLayer.h
similarity index 100%
rename from tests/validation/CPP/DequantizationLayer.h
rename to tests/validation/reference/DequantizationLayer.h
diff --git a/tests/validation/reference/Dilate.cpp b/tests/validation/reference/Dilate.cpp
new file mode 100644
index 0000000..0683a0a
--- /dev/null
+++ b/tests/validation/reference/Dilate.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Dilate.h"
+
+#include "Utils.h"
+#include "tests/validation/Helpers.h"
+
+#include <algorithm>
+#include <array>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> dilate(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value)
+{
+    /*
+             -1   x  +1
+         -1 [tl][tc][tr] -1
+          y [ml][xy][mr]  y
+         +1 [bl][bc][br] +1
+             -1   x  +1
+        dilate:
+        dst(x, y) = max[ src(x', y') for x-1<=x'<=x+1, y-1<=y'<=y+1 ] = max({tl, tc, tr, ml, xy, mr, bl, bc, br})
+    */
+    SimpleTensor<T> dst(src.shape(), src.data_type());
+
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        Coordinates coord = index2coord(src.shape(), i);
+        const int   x     = coord.x();
+        const int   y     = coord.y();
+
+        std::array<T, 9> neighbours = { { 0 } };
+        for(int row = y - 1, j = 0; row <= y + 1; ++row)
+        {
+            for(int col = x - 1; col <= x + 1; ++col, ++j)
+            {
+                coord.set(0, col);
+                coord.set(1, row);
+                neighbours[j] = tensor_elem_at(src, coord, border_mode, constant_border_value);
+            }
+        }
+
+        dst[i] = *std::max_element(neighbours.cbegin(), neighbours.cend());
+    }
+
+    return dst;
+}
+
+template SimpleTensor<uint8_t> dilate(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/Scale.h b/tests/validation/reference/Dilate.h
similarity index 82%
copy from tests/validation/CPP/Scale.h
copy to tests/validation/reference/Dilate.h
index 53183ae..6586cbf 100644
--- a/tests/validation/CPP/Scale.h
+++ b/tests/validation/reference/Dilate.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_SCALE_H__
-#define __ARM_COMPUTE_TEST_SCALE_H__
+#ifndef __ARM_COMPUTE_TEST_DILATE_H__
+#define __ARM_COMPUTE_TEST_DILATE_H__
 
 #include "tests/SimpleTensor.h"
 
@@ -35,9 +35,9 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> scale(const SimpleTensor<T> &in, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value = 0);
+SimpleTensor<T> dilate(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value = 0);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_SCALE_H__ */
+#endif /* __ARM_COMPUTE_TEST_DILATE_H__ */
diff --git a/tests/validation/reference/Erode.cpp b/tests/validation/reference/Erode.cpp
new file mode 100644
index 0000000..5e8b36f
--- /dev/null
+++ b/tests/validation/reference/Erode.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Erode.h"
+
+#include "Utils.h"
+#include "tests/validation/Helpers.h"
+
+#include <algorithm>
+#include <array>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> erode(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value)
+{
+    /*
+             -1   x  +1
+         -1 [tl][tc][tr] -1
+          y [ml][xy][mr]  y
+         +1 [bl][bc][br] +1
+             -1   x  +1
+        erode:
+        dst(x, y) = min[ src(x', y') for x-1<=x'<=x+1, y-1<=y'<=y+1 ] = min({tl, tc, tr, ml, xy, mr, bl, bc, br})
+    */
+    SimpleTensor<T> dst(src.shape(), src.data_type());
+
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        Coordinates coord = index2coord(src.shape(), i);
+        const int   x     = coord.x();
+        const int   y     = coord.y();
+
+        std::array<T, 9> neighbours = { { 0 } };
+        for(int row = y - 1, j = 0; row <= y + 1; ++row)
+        {
+            for(int col = x - 1; col <= x + 1; ++col, ++j)
+            {
+                coord.set(0, col);
+                coord.set(1, row);
+                neighbours[j] = tensor_elem_at(src, coord, border_mode, constant_border_value);
+            }
+        }
+
+        dst[i] = *std::min_element(neighbours.cbegin(), neighbours.cend());
+    }
+
+    return dst;
+}
+
+template SimpleTensor<uint8_t> erode(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/Scale.h b/tests/validation/reference/Erode.h
similarity index 82%
copy from tests/validation/CPP/Scale.h
copy to tests/validation/reference/Erode.h
index 53183ae..5e27513 100644
--- a/tests/validation/CPP/Scale.h
+++ b/tests/validation/reference/Erode.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_SCALE_H__
-#define __ARM_COMPUTE_TEST_SCALE_H__
+#ifndef __ARM_COMPUTE_TEST_ERODE_H__
+#define __ARM_COMPUTE_TEST_ERODE_H__
 
 #include "tests/SimpleTensor.h"
 
@@ -35,9 +35,9 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> scale(const SimpleTensor<T> &in, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value = 0);
+SimpleTensor<T> erode(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value = 0);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_SCALE_H__ */
+#endif /* __ARM_COMPUTE_TEST_ERODE_H__ */
diff --git a/tests/validation/CPP/FixedPoint.cpp b/tests/validation/reference/FixedPoint.cpp
similarity index 100%
rename from tests/validation/CPP/FixedPoint.cpp
rename to tests/validation/reference/FixedPoint.cpp
diff --git a/tests/validation/CPP/FixedPoint.h b/tests/validation/reference/FixedPoint.h
similarity index 100%
rename from tests/validation/CPP/FixedPoint.h
rename to tests/validation/reference/FixedPoint.h
diff --git a/tests/validation/reference/FixedPointPixelWiseMultiplication.cpp b/tests/validation/reference/FixedPointPixelWiseMultiplication.cpp
new file mode 100644
index 0000000..636919b
--- /dev/null
+++ b/tests/validation/reference/FixedPointPixelWiseMultiplication.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * dst OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "FixedPointPixelWiseMultiplication.h"
+
+#include "tests/validation/FixedPoint.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> fixed_point_pixel_wise_multiplication(const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, float scale, ConvertPolicy convert_policy)
+{
+    using namespace fixed_point_arithmetic;
+
+    SimpleTensor<T> dst(src2.shape(), src2.data_type(), 1, src2.fixed_point_position());
+
+    const int fixed_point_position = src1.fixed_point_position();
+
+    ARM_COMPUTE_ERROR_ON_MSG(src1.data_type() != src2.data_type() || src1.data_type() != dst.data_type(),
+                             "Tensors must all have the same DataType");
+    ARM_COMPUTE_ERROR_ON_MSG(fixed_point_position != src2.fixed_point_position() || fixed_point_position != dst.fixed_point_position(),
+                             "Fixed-point position must be the same for both inputs and outputs");
+
+    // Validate fixed_point_position
+    ARM_COMPUTE_ERROR_ON((src1.data_type() == DataType::QS8) && (fixed_point_position == 0 || fixed_point_position > 7));
+    ARM_COMPUTE_ERROR_ON((src1.data_type() == DataType::QS16) && (fixed_point_position == 0 || fixed_point_position > 15));
+
+    const fixed_point<T> fp_scale(scale, fixed_point_position);
+    const bool           is_sat = convert_policy == ConvertPolicy::SATURATE;
+
+    for(int i = 0; i < src1.num_elements(); ++i)
+    {
+        const fixed_point<T> val1(src1[i], fixed_point_position, true);
+        fixed_point<T>       res(src2[i], fixed_point_position, true);
+        if(is_sat)
+        {
+            res = mul(mul(res, val1), fp_scale);
+        }
+        else
+        {
+            res = mul<OverflowPolicy::WRAP>(mul<OverflowPolicy::WRAP>(res, val1), fp_scale);
+        }
+        dst[i] = res.raw();
+    }
+
+    return dst;
+}
+
+// *INDENT-OFF*
+// clang-format off
+template SimpleTensor<qint8_t> fixed_point_pixel_wise_multiplication(const SimpleTensor<qint8_t> &src1, const SimpleTensor<qint8_t> &src2, float scale, ConvertPolicy convert_policy);
+template SimpleTensor<qint16_t> fixed_point_pixel_wise_multiplication(const SimpleTensor<qint16_t> &src1, const SimpleTensor<qint16_t> &src2, float scale, ConvertPolicy convert_policy);
+// *INDENT-ON*
+// clang-format on
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/ArithmeticSubtraction.h b/tests/validation/reference/FixedPointPixelWiseMultiplication.h
similarity index 78%
copy from tests/validation/CPP/ArithmeticSubtraction.h
copy to tests/validation/reference/FixedPointPixelWiseMultiplication.h
index 18b0d12..124a33c 100644
--- a/tests/validation/CPP/ArithmeticSubtraction.h
+++ b/tests/validation/reference/FixedPointPixelWiseMultiplication.h
@@ -21,11 +21,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_ARITHMETIC_SUBTRACTION_H__
-#define __ARM_COMPUTE_TEST_ARITHMETIC_SUBTRACTION_H__
+#ifndef __ARM_COMPUTE_TEST_FIXED_POINT_PIXEL_WISE_MULTIPLICATION_H__
+#define __ARM_COMPUTE_TEST_FIXED_POINT_PIXEL_WISE_MULTIPLICATION_H__
 
 #include "tests/SimpleTensor.h"
-#include "tests/validation/Helpers.h"
 
 namespace arm_compute
 {
@@ -36,9 +35,9 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> arithmetic_subtraction(const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
+SimpleTensor<T> fixed_point_pixel_wise_multiplication(const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, float scale, ConvertPolicy convert_policy);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_ARITHMETIC_SUBTRACTION_H__ */
+#endif /* __ARM_COMPUTE_TEST_FIXED_POINT_PIXEL_WISE_MULTIPLICATION_H__ */
diff --git a/tests/validation/CPP/FlattenLayer.cpp b/tests/validation/reference/FlattenLayer.cpp
similarity index 100%
rename from tests/validation/CPP/FlattenLayer.cpp
rename to tests/validation/reference/FlattenLayer.cpp
diff --git a/tests/validation/CPP/FlattenLayer.h b/tests/validation/reference/FlattenLayer.h
similarity index 100%
rename from tests/validation/CPP/FlattenLayer.h
rename to tests/validation/reference/FlattenLayer.h
diff --git a/tests/validation/CPP/Floor.cpp b/tests/validation/reference/Floor.cpp
similarity index 100%
rename from tests/validation/CPP/Floor.cpp
rename to tests/validation/reference/Floor.cpp
diff --git a/tests/validation/CPP/Floor.h b/tests/validation/reference/Floor.h
similarity index 100%
rename from tests/validation/CPP/Floor.h
rename to tests/validation/reference/Floor.h
diff --git a/tests/validation/reference/FullyConnectedLayer.cpp b/tests/validation/reference/FullyConnectedLayer.cpp
new file mode 100644
index 0000000..c24881e
--- /dev/null
+++ b/tests/validation/reference/FullyConnectedLayer.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "FullyConnectedLayer.h"
+
+#include "arm_compute/core/Types.h"
+#include "tests/validation/FixedPoint.h"
+#include "tests/validation/reference/UtilsQuantizedAsymm.h"
+
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+
+#include <numeric>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+// Vector matrix multiply for floating point
+template < typename T, typename TB, typename std::enable_if < is_floating_point<T>::value &&is_floating_point<TB>::value, int >::type = 0 >
+void vector_matrix_multiply(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst, int offset_src, int offset_dst, int cols_weights,
+                            int rows_weights, uint8_t fixed_point_position)
+{
+    ARM_COMPUTE_UNUSED(fixed_point_position);
+
+    const T *src_ptr     = src.data() + offset_src;
+    const T *weights_ptr = weights.data();
+    const TB *bias_ptr    = bias.data();
+    T        *dst_ptr     = dst.data() + offset_dst;
+
+    for(int y = 0; y < rows_weights; ++y)
+    {
+        dst_ptr[y] = std::inner_product(src_ptr, src_ptr + cols_weights, weights_ptr, static_cast<T>(0)) + bias_ptr[y];
+        weights_ptr += cols_weights;
+    }
+}
+
+// Vector matrix multiply for fixed point type
+template < typename T, typename TB, typename std::enable_if < std::is_integral<T>::value &&std::is_integral<TB>::value, int >::type = 0 >
+void vector_matrix_multiply(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &dst, int offset_src, int offset_dst, int cols_weights,
+                            int rows_weights, uint8_t fixed_point_position)
+{
+    const T *src_ptr     = src.data() + offset_src;
+    const T *weights_ptr = weights.data();
+    const TB *bias_ptr    = bias.data();
+    T        *dst_ptr     = dst.data() + offset_dst;
+
+    using namespace fixed_point_arithmetic;
+    using promoted_type = fixed_point_arithmetic::traits::promote_t<T>;
+
+    for(int y = 0; y < rows_weights; ++y)
+    {
+        // Reset accumulator
+        fixed_point<promoted_type> acc(0, fixed_point_position);
+
+        for(int x = 0; x < cols_weights; ++x)
+        {
+            const fixed_point<promoted_type> i_value(src_ptr[x], fixed_point_position, true);
+            const fixed_point<promoted_type> w_value(weights_ptr[x], fixed_point_position, true);
+            acc = acc + i_value * w_value;
+        }
+
+        // Get the bias
+        const fixed_point<T> b(bias_ptr[y], fixed_point_position, true);
+
+        // Convert back and accumulate the bias
+        fixed_point<T> res(acc);
+        res = res + b;
+
+        // Store the result
+        dst_ptr[y] = res.raw();
+
+        weights_ptr += cols_weights;
+    }
+}
+
+// Vector matrix multiply for quantized type
+template <>
+void vector_matrix_multiply(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, SimpleTensor<uint8_t> &dst, int offset_src, int offset_dst,
+                            int cols_weights, int rows_weights, uint8_t fixed_point_position)
+{
+    ARM_COMPUTE_UNUSED(fixed_point_position);
+
+    const uint8_t *src_ptr     = src.data() + offset_src;
+    const uint8_t *weights_ptr = weights.data();
+    const int32_t *bias_ptr    = bias.data();
+    uint8_t       *dst_ptr     = dst.data() + offset_dst;
+
+    const int   input_offset   = -src.quantization_info().offset;
+    const float input_scale    = src.quantization_info().scale;
+    const int   weights_offset = -weights.quantization_info().offset;
+    const float weights_scale  = weights.quantization_info().scale;
+    const int   output_offset  = dst.quantization_info().offset;
+    const float output_scale   = dst.quantization_info().scale;
+
+    int         output_multiplier = 0;
+    int         output_shift      = 0;
+    const float multiplier        = input_scale * weights_scale / output_scale;
+    arm_compute::quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+
+    for(int y = 0; y < rows_weights; ++y)
+    {
+        // Reset accumulator
+        int32_t acc = 0;
+
+        for(int x = 0; x < cols_weights; ++x)
+        {
+            acc += (src_ptr[x] + input_offset) * (weights_ptr[x] + weights_offset);
+        }
+
+        // Accumulate the bias
+        acc += bias_ptr[y];
+
+        acc = asymm_rounding_divide_by_pow2(asymm_int_mult(acc, output_multiplier), output_shift);
+        acc += output_offset;
+        acc = clamp<int32_t>(acc, 0, 255);
+
+        // Store the result
+        dst_ptr[y] = static_cast<uint8_t>(acc);
+
+        weights_ptr += cols_weights;
+    }
+}
+} // namespace
+
+template <typename T, typename TB>
+SimpleTensor<T> fully_connected_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &dst_shape)
+{
+    // Create reference
+    SimpleTensor<T> dst{ TensorShape{ dst_shape }, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() };
+
+    // Sanity checks
+    const int          num_batch_dimensions = std::max(0, static_cast<int>(dst_shape.num_dimensions()) - 1);
+    const int          num_input_dimensions = src.shape().num_dimensions() - num_batch_dimensions;
+    const unsigned int linear_input_size    = src.shape().total_size_lower(num_input_dimensions);
+
+    ARM_COMPUTE_UNUSED(num_batch_dimensions);
+    ARM_COMPUTE_UNUSED(num_input_dimensions);
+    ARM_COMPUTE_UNUSED(linear_input_size);
+    ARM_COMPUTE_ERROR_ON(weights.shape().x() != linear_input_size);
+    ARM_COMPUTE_ERROR_ON(weights.shape().y() != bias.shape().x());
+    ARM_COMPUTE_ERROR_ON(weights.shape().y() != dst.shape().x());
+
+    // Compute reference
+    const int cols_weights = weights.shape().x();
+    const int rows_weights = weights.shape().y();
+    const int num_batches  = dst_shape.total_size_upper(1);
+
+    for(int k = 0; k < num_batches; ++k)
+    {
+        const int offset_in  = k * cols_weights;
+        const int offset_out = k * rows_weights;
+
+        vector_matrix_multiply<T>(src,
+                                  weights,
+                                  bias,
+                                  dst,
+                                  offset_in,
+                                  offset_out,
+                                  cols_weights,
+                                  rows_weights,
+                                  src.fixed_point_position());
+    }
+
+    return dst;
+}
+
+template SimpleTensor<float> fully_connected_layer(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &bias, const TensorShape &dst_shape);
+template SimpleTensor<half> fully_connected_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &bias, const TensorShape &dst_shape);
+template SimpleTensor<qint8_t> fully_connected_layer(const SimpleTensor<qint8_t> &src, const SimpleTensor<qint8_t> &weights, const SimpleTensor<qint8_t> &bias, const TensorShape &dst_shape);
+template SimpleTensor<qint16_t> fully_connected_layer(const SimpleTensor<qint16_t> &src, const SimpleTensor<qint16_t> &weights, const SimpleTensor<qint16_t> &bias, const TensorShape &dst_shape);
+template SimpleTensor<uint8_t> fully_connected_layer(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &bias, const TensorShape &dst_shape);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/FullyConnectedLayer.h b/tests/validation/reference/FullyConnectedLayer.h
similarity index 92%
rename from tests/validation/CPP/FullyConnectedLayer.h
rename to tests/validation/reference/FullyConnectedLayer.h
index 05c570a..1dfb496 100644
--- a/tests/validation/CPP/FullyConnectedLayer.h
+++ b/tests/validation/reference/FullyConnectedLayer.h
@@ -35,8 +35,8 @@
 {
 namespace reference
 {
-template <typename T>
-SimpleTensor<T> fully_connected_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<T> &bias, const TensorShape &dst_shape);
+template <typename T, typename TB>
+SimpleTensor<T> fully_connected_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, const TensorShape &dst_shape);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/CPP/GEMM.cpp b/tests/validation/reference/GEMM.cpp
similarity index 100%
rename from tests/validation/CPP/GEMM.cpp
rename to tests/validation/reference/GEMM.cpp
diff --git a/tests/validation/CPP/GEMM.h b/tests/validation/reference/GEMM.h
similarity index 100%
rename from tests/validation/CPP/GEMM.h
rename to tests/validation/reference/GEMM.h
diff --git a/tests/validation/reference/GEMMInterleave4x4.h b/tests/validation/reference/GEMMInterleave4x4.h
new file mode 100644
index 0000000..e6b09af
--- /dev/null
+++ b/tests/validation/reference/GEMMInterleave4x4.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GEMM.h"
+
+#include "arm_compute/core/Types.h"
+#include "tests/validation/FixedPoint.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> gemm_interleave_4x4(const SimpleTensor<T> &in, SimpleTensor<T> &out)
+{
+    const T      *mtx_in     = reinterpret_cast<const T *>(in.data());
+    T            *mtx_ref    = reinterpret_cast<T *>(out.data());
+    const int32_t in_rows    = in.shape().y();
+    const int32_t in_cols    = in.shape().x();
+    const int32_t out_stride = out.shape().x();
+    int32_t       y          = 0;
+    for(; y <= (in_rows - 4); y += 4)
+    {
+        const T *in_ptr = &mtx_in[y * in_cols];
+
+        for(int32_t x = 0; x < in_cols; x++)
+        {
+            const T tmp[4] = { in_ptr[x + 0 * in_cols],
+                               in_ptr[x + 1 * in_cols],
+                               in_ptr[x + 2 * in_cols],
+                               in_ptr[x + 3 * in_cols]
+                             };
+
+            T *dst = &mtx_ref[static_cast<size_t>(x * 4.f) + static_cast<size_t>(std::ceil(y / 4.f)) * out_stride];
+            memcpy(dst, tmp, sizeof(T) * 4);
+        }
+    }
+
+    // Leftover along the Y direction
+    const int32_t leftover_y = in_rows - y;
+
+    if(leftover_y != 0)
+    {
+        const T *in_ptr = &mtx_in[y * in_cols];
+
+        for(int32_t x = 0; x < in_cols; x++)
+        {
+            T tmp[4] = { 0, 0, 0, 0 };
+
+            for(int32_t k = 0; k < leftover_y; k++)
+            {
+                tmp[k] = in_ptr[k * in_cols + x];
+            }
+            T *dst = &mtx_ref[static_cast<size_t>(x * 4.f) + static_cast<size_t>(std::ceil(y / 4.f)) * out_stride];
+            memcpy(dst, tmp, sizeof(T) * 4);
+        }
+    }
+
+    return out;
+}
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/GEMMInterleaveBlocked.h b/tests/validation/reference/GEMMInterleaveBlocked.h
new file mode 100644
index 0000000..ff5a0d6
--- /dev/null
+++ b/tests/validation/reference/GEMMInterleaveBlocked.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GEMM.h"
+
+#include "arm_compute/core/Types.h"
+#include "tests/validation/FixedPoint.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+T safe_read(const SimpleTensor<T> &t, int y, int x)
+{
+    const int stride = t.shape().x();
+    const int M      = t.shape().y();
+    const int N      = t.shape().x();
+    if((y < M) && (x < N))
+    {
+        return t[y * stride + x];
+    }
+    return 0;
+}
+
+template <typename T>
+SimpleTensor<T> gemm_interleave_blocked(const SimpleTensor<T> &in, SimpleTensor<T> &out, int int_by, int block, bool transposed)
+{
+    const int M = out.shape().y();
+    const int N = out.shape().x();
+    for(int y = 0; y < M; y++)
+    {
+        T *out_ptr = &out[y * N];
+        for(int x = 0; x < (N / int_by); x += block)
+        {
+            for(int z = 0; z < int_by; z++)
+            {
+                for(int a = 0; (out_ptr <= &out[y * N + (N - 1)]) && a < block; a++)
+                {
+                    if(!transposed)
+                        *out_ptr++ = safe_read(in, (y * int_by) + z, x + a);
+                    else
+                    {
+                        const T value = safe_read(in, x + a, (y * int_by) + z);
+                        *out_ptr++    = value;
+                    }
+                }
+            }
+        }
+    }
+    return out;
+}
+
+template SimpleTensor<uint8_t> gemm_interleave_blocked(const SimpleTensor<uint8_t> &in, SimpleTensor<uint8_t> &out, int int_by, int block, bool transposed);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/GEMMLowp.cpp b/tests/validation/reference/GEMMLowp.cpp
new file mode 100644
index 0000000..8e41aef
--- /dev/null
+++ b/tests/validation/reference/GEMMLowp.cpp
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GEMMLowp.h"
+
+#include "arm_compute/core/Types.h"
+#include "tests/validation/reference/UtilsQuantizedAsymm.h"
+
+#include <limits>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+template <typename T>
+void quantize_down_int32_to_uint8_scale(const SimpleTensor<T> *in, const SimpleTensor<T> *bias, SimpleTensor<uint8_t> *dst, int32_t result_offset, int32_t result_mult_int, int32_t result_shift,
+                                        int32_t min, int32_t max)
+{
+    const int cols_in = in->shape().x();
+
+    for(int i = 0; i < in->num_elements(); ++i)
+    {
+        int32_t result = ((*in)[i] + result_offset);
+
+        if(bias != nullptr)
+        {
+            result += (*bias)[i % cols_in];
+        }
+
+        result *= result_mult_int;
+
+        result >>= result_shift;
+
+        // Bounded ReLu
+        if(min != max)
+        {
+            result = std::max(min, std::min(max, result));
+        }
+
+        (*dst)[i] = static_cast<uint8_t>(std::max(0, std::min(255, result)));
+    }
+}
+
+template <typename T>
+void quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> *in, const SimpleTensor<T> *bias, SimpleTensor<uint8_t> *dst, int32_t result_fixedpoint_multiplier, int32_t result_shift,
+                                                      int32_t result_offset_after_shift, int32_t min, int32_t max)
+{
+    const int cols_in = in->shape().x();
+
+    for(int i = 0; i < in->num_elements(); ++i)
+    {
+        int32_t result = (*in)[i];
+
+        if(bias != nullptr)
+        {
+            result += (*bias)[i % cols_in];
+        }
+
+        // Fixed point multiplication
+        result = asymm_rounding_divide_by_pow2(asymm_int_mult(result, result_fixedpoint_multiplier), result_shift);
+        result += result_offset_after_shift;
+
+        // Bounded ReLu
+        if(min != max)
+        {
+            result = std::max(min, std::min(max, result));
+        }
+
+        (*dst)[i] = static_cast<uint8_t>(std::max(0, std::min(255, result)));
+    }
+}
+} // namespace
+
+template <typename T_out, typename T_in>
+SimpleTensor<T_out> gemmlowp_matrix_multiply_core(const SimpleTensor<T_in> &a, const SimpleTensor<T_in> &b, int32_t a_offset, int32_t b_offset)
+{
+    static_assert(std::is_same<typename std::decay<T_out>::type, int32_t>::value, "Only int32_t is allowed for the output");
+
+    TensorShape         shape(b.shape()[0], a.shape()[1]);
+    DataType            dt = std::is_same<T_out, int32_t>::value ? DataType::S32 : DataType::U32;
+    SimpleTensor<T_out> c(shape, dt);
+
+    const int K       = a.shape().x();
+    const int b_width = b.shape().x();
+    const int rows    = c.shape().y(); //M
+    const int cols    = c.shape().x(); //N
+
+    std::vector<T_out> acc;
+    acc.resize(cols);
+
+    for(int i = 0; i < rows; ++i)
+    {
+        for(int j = 0; j < cols; ++j)
+        {
+            acc[j] = 0;
+        }
+        for(int k = 0; k < K; ++k)
+        {
+            const T_out tmp_a = a_offset + static_cast<T_out>(a[k + i * K]);
+            for(int j = 0; j < b_width; ++j)
+            {
+                const T_out tmp_b       = b_offset + static_cast<T_out>(b[j + k * b_width]);
+                const T_out mult_as_int = tmp_a * tmp_b;
+                acc[j] += mult_as_int;
+            }
+        }
+        for(int j = 0; j < cols; ++j)
+        {
+            c[j + i * cols] = acc[j];
+        }
+    }
+
+    return c;
+}
+
+// used to validate assembly kernels which don't know anything about offsets
+template <typename T1, typename T2>
+SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T2> &b)
+{
+    return gemmlowp_matrix_multiply_core<T1, T2>(a, b, 0, 0);
+}
+
+template <typename T>
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max)
+{
+    SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8);
+
+    quantize_down_int32_to_uint8_scale<T>(&in, nullptr, &dst, result_offset, result_mult_int, result_shift, min, max);
+
+    return dst;
+}
+
+template <typename T>
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_offset, int32_t result_mult_int, int32_t result_shift,
+                                                                  int32_t min, int32_t max)
+{
+    SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8);
+
+    quantize_down_int32_to_uint8_scale<T>(&in, &bias, &dst, result_offset, result_mult_int, result_shift, min, max);
+
+    return dst;
+}
+
+template <typename T>
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, int32_t result_fixedpoint_multiplier, int32_t result_shift,
+                                                                                int32_t result_offset_after_shift, int32_t min,
+                                                                                int32_t max)
+{
+    SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8);
+
+    quantize_down_int32_to_uint8_scale_by_fixedpoint<T>(&in, nullptr, &dst, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
+
+    return dst;
+}
+
+template <typename T>
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_fixedpoint_multiplier, int32_t result_shift,
+                                                                                int32_t result_offset_after_shift, int32_t min, int32_t max)
+{
+    SimpleTensor<uint8_t> dst(in.shape(), DataType::QASYMM8);
+
+    quantize_down_int32_to_uint8_scale_by_fixedpoint<T>(&in, &bias, &dst, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
+
+    return dst;
+}
+
+template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, int32_t result_fixedpoint_multiplier, int32_t result_shift,
+                                                                                         int32_t result_offset_after_shift, int32_t min, int32_t max);
+template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_fixedpoint_multiplier,
+                                                                                         int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max);
+template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<int32_t> &a, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min,
+                                                                           int32_t max);
+template SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<int32_t> &a, const SimpleTensor<int32_t> &b, int32_t result_offset, int32_t result_mult_int,
+                                                                           int32_t result_shift, int32_t min, int32_t max);
+template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b, int32_t a_offset, int32_t b_offset);
+template SimpleTensor<int32_t> gemmlowp_matrix_multiply_core(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b, int32_t a_offset, int32_t b_offset);
+template SimpleTensor<int32_t> gemmlowp(const SimpleTensor<int8_t> &a, const SimpleTensor<int8_t> &b);
+template SimpleTensor<int32_t> gemmlowp(const SimpleTensor<uint8_t> &a, const SimpleTensor<uint8_t> &b);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/GEMMLowp.h b/tests/validation/reference/GEMMLowp.h
new file mode 100644
index 0000000..a3d0beb
--- /dev/null
+++ b/tests/validation/reference/GEMMLowp.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_GEMMLOWP_H__
+#define __ARM_COMPUTE_TEST_GEMMLOWP_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min = 0, int32_t max = 0);
+template <typename T1, typename T2>
+SimpleTensor<T1> gemmlowp_matrix_multiply_core(const SimpleTensor<T2> &a, const SimpleTensor<T2> &b, int32_t a_offset, int32_t b_offset);
+
+template <typename T>
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, int32_t result_offset, int32_t result_mult_int, int32_t result_shift);
+
+template <typename T1, typename T2>
+SimpleTensor<T1> gemmlowp(const SimpleTensor<T2> &a, const SimpleTensor<T2> &b);
+
+template <typename T>
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_offset, int32_t result_mult_int, int32_t result_shift,
+                                                                  int32_t min = 0, int32_t max = 0);
+
+template <typename T>
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, int32_t result_fixedpoint_multiplier, int32_t result_shift,
+                                                                                int32_t result_offset_after_shift,
+                                                                                int32_t min = 0, int32_t max = 0);
+
+template <typename T>
+SimpleTensor<uint8_t> gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(const SimpleTensor<T> &in, const SimpleTensor<T> &bias, int32_t result_fixedpoint_multiplier, int32_t result_shift,
+                                                                                int32_t result_offset_after_shift, int32_t min = 0, int32_t max = 0);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_GEMMLOWP_H__ */
diff --git a/tests/validation/CPP/Gaussian3x3.cpp b/tests/validation/reference/Gaussian3x3.cpp
similarity index 100%
rename from tests/validation/CPP/Gaussian3x3.cpp
rename to tests/validation/reference/Gaussian3x3.cpp
diff --git a/tests/validation/CPP/Gaussian3x3.h b/tests/validation/reference/Gaussian3x3.h
similarity index 100%
rename from tests/validation/CPP/Gaussian3x3.h
rename to tests/validation/reference/Gaussian3x3.h
diff --git a/tests/validation/CPP/Gaussian5x5.cpp b/tests/validation/reference/Gaussian5x5.cpp
similarity index 100%
rename from tests/validation/CPP/Gaussian5x5.cpp
rename to tests/validation/reference/Gaussian5x5.cpp
diff --git a/tests/validation/CPP/Gaussian5x5.h b/tests/validation/reference/Gaussian5x5.h
similarity index 100%
rename from tests/validation/CPP/Gaussian5x5.h
rename to tests/validation/reference/Gaussian5x5.h
diff --git a/tests/validation/reference/GaussianPyramidHalf.cpp b/tests/validation/reference/GaussianPyramidHalf.cpp
new file mode 100644
index 0000000..0a68ded
--- /dev/null
+++ b/tests/validation/reference/GaussianPyramidHalf.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GaussianPyramidHalf.h"
+
+#include "arm_compute/core/Helpers.h"
+
+#include "Gaussian5x5.h"
+#include "Scale.h"
+#include "Utils.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+std::vector<SimpleTensor<T>> gaussian_pyramid_half(const SimpleTensor<T> &src, BorderMode border_mode, uint8_t constant_border_value, size_t num_levels)
+{
+    std::vector<SimpleTensor<T>> dst;
+
+    // Level0 is equal to src
+    dst.push_back(src);
+
+    for(size_t i = 1; i < num_levels; ++i)
+    {
+        // Gaussian Filter
+        const SimpleTensor<T> out_gaus5x5 = reference::gaussian5x5(dst[i - 1], border_mode, constant_border_value);
+
+        // Scale down by 2 with nearest interpolation
+        const SimpleTensor<T> out = reference::scale(out_gaus5x5, SCALE_PYRAMID_HALF, SCALE_PYRAMID_HALF, InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, constant_border_value, SamplingPolicy::CENTER,
+                                                     true);
+
+        dst.push_back(out);
+    }
+
+    return dst;
+}
+
+template std::vector<SimpleTensor<uint8_t>> gaussian_pyramid_half(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value, size_t num_levels);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/Scale.h b/tests/validation/reference/GaussianPyramidHalf.h
similarity index 80%
copy from tests/validation/CPP/Scale.h
copy to tests/validation/reference/GaussianPyramidHalf.h
index 53183ae..abd29e1 100644
--- a/tests/validation/CPP/Scale.h
+++ b/tests/validation/reference/GaussianPyramidHalf.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_SCALE_H__
-#define __ARM_COMPUTE_TEST_SCALE_H__
+#ifndef __ARM_COMPUTE_TEST_GAUSSIAN_PYRAMID_HALF_H__
+#define __ARM_COMPUTE_TEST_GAUSSIAN_PYRAMID_HALF_H__
 
 #include "tests/SimpleTensor.h"
 
@@ -35,9 +35,9 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> scale(const SimpleTensor<T> &in, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value = 0);
+std::vector<SimpleTensor<T>> gaussian_pyramid_half(const SimpleTensor<T> &src, BorderMode border_mode, uint8_t constant_border_value, size_t num_levels);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_SCALE_H__ */
+#endif /* __ARM_COMPUTE_TEST_GAUSSIAN_PYRAMID_HALF_H__ */
\ No newline at end of file
diff --git a/tests/validation/CPP/HarrisCornerDetector.cpp b/tests/validation/reference/HarrisCornerDetector.cpp
similarity index 95%
rename from tests/validation/CPP/HarrisCornerDetector.cpp
rename to tests/validation/reference/HarrisCornerDetector.cpp
index 3babfee..442f6a1 100644
--- a/tests/validation/CPP/HarrisCornerDetector.cpp
+++ b/tests/validation/reference/HarrisCornerDetector.cpp
@@ -24,9 +24,9 @@
 #include "HarrisCornerDetector.h"
 
 #include "Utils.h"
-#include "tests/validation/CPP/NonMaximaSuppression.h"
-#include "tests/validation/CPP/Sobel.h"
 #include "tests/validation/Helpers.h"
+#include "tests/validation/reference/NonMaximaSuppression.h"
+#include "tests/validation/reference/Sobel.h"
 
 namespace arm_compute
 {
@@ -45,7 +45,7 @@
     SimpleTensor<T> grad_y;
     float           norm_factor = 0.f;
 
-    std::tie(grad_x, grad_y) = sobel<T>(src, gradient_size, border_mode, constant_border_value);
+    std::tie(grad_x, grad_y) = sobel<T>(src, gradient_size, border_mode, constant_border_value, GradientDimension::GRAD_XY);
 
     switch(gradient_size)
     {
@@ -99,9 +99,9 @@
         float Gxy = 0.f;
 
         // Calculate Gx^2, Gy^2 and Gxy within the given window
-        for(int y = src_coord.y() - block_size / 2; y <= src_coord.y() + block_size / 2; ++y)
+        for(int y = block_top_left.y(); y <= block_bottom_right.y(); ++y)
         {
-            for(int x = src_coord.x() - block_size / 2; x <= src_coord.x() + block_size / 2; ++x)
+            for(int x = block_top_left.x(); x <= block_bottom_right.x(); ++x)
             {
                 Coordinates block_coord(x, y);
 
@@ -139,7 +139,7 @@
     {
         Coordinates coord = index2coord(suppressed_scores.shape(), i);
 
-        if(is_in_valid_region(suppressed_scores_region, coord) && suppressed_scores[i] > 0.f)
+        if(is_in_valid_region(suppressed_scores_region, coord) && suppressed_scores[i] != 0.f)
         {
             KeyPoint corner;
             corner.x               = coord.x();
diff --git a/tests/validation/CPP/HarrisCornerDetector.h b/tests/validation/reference/HarrisCornerDetector.h
similarity index 100%
rename from tests/validation/CPP/HarrisCornerDetector.h
rename to tests/validation/reference/HarrisCornerDetector.h
diff --git a/tests/validation/CPP/ArithmeticSubtraction.h b/tests/validation/reference/Histogram.cpp
similarity index 60%
copy from tests/validation/CPP/ArithmeticSubtraction.h
copy to tests/validation/reference/Histogram.cpp
index 18b0d12..594c4fb 100644
--- a/tests/validation/CPP/ArithmeticSubtraction.h
+++ b/tests/validation/reference/Histogram.cpp
@@ -21,11 +21,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_ARITHMETIC_SUBTRACTION_H__
-#define __ARM_COMPUTE_TEST_ARITHMETIC_SUBTRACTION_H__
+#include "Histogram.h"
 
-#include "tests/SimpleTensor.h"
-#include "tests/validation/Helpers.h"
+#include "Utils.h"
+#include "arm_compute/core/Helpers.h"
 
 namespace arm_compute
 {
@@ -36,9 +35,31 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> arithmetic_subtraction(const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
+SimpleTensor<uint32_t> histogram(const SimpleTensor<T> &src, size_t num_bins, int32_t offset, uint32_t range)
+{
+    SimpleTensor<uint32_t> dst(TensorShape(num_bins), DataType::U32);
+
+    // Clear the distribution
+    for(size_t element_idx = 0; element_idx < num_bins; ++element_idx)
+    {
+        dst[element_idx] = 0;
+    }
+
+    // Create the histogram
+    for(int element_idx = 0; element_idx < src.num_elements(); ++element_idx)
+    {
+        if((offset <= src[element_idx]) && (src[element_idx] < (offset + range)))
+        {
+            const int index = (src[element_idx] - offset) * num_bins / range;
+            dst[index]++;
+        }
+    }
+
+    return dst;
+}
+
+template SimpleTensor<uint32_t> histogram(const SimpleTensor<uint8_t> &src, size_t num_bins, int32_t offset, uint32_t range);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_ARITHMETIC_SUBTRACTION_H__ */
diff --git a/tests/validation/CPP/Scale.h b/tests/validation/reference/Histogram.h
similarity index 82%
copy from tests/validation/CPP/Scale.h
copy to tests/validation/reference/Histogram.h
index 53183ae..7ec1669 100644
--- a/tests/validation/CPP/Scale.h
+++ b/tests/validation/reference/Histogram.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_SCALE_H__
-#define __ARM_COMPUTE_TEST_SCALE_H__
+#ifndef __ARM_COMPUTE_TEST_HISTOGRAM_H__
+#define __ARM_COMPUTE_TEST_HISTOGRAM_H__
 
 #include "tests/SimpleTensor.h"
 
@@ -35,9 +35,9 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> scale(const SimpleTensor<T> &in, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value = 0);
+SimpleTensor<uint32_t> histogram(const SimpleTensor<T> &src, size_t num_bins, int32_t offset, uint32_t range);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_SCALE_H__ */
+#endif /* __ARM_COMPUTE_TEST_HISTOGRAM_H__ */
diff --git a/tests/validation/CPP/IntegralImage.cpp b/tests/validation/reference/IntegralImage.cpp
similarity index 100%
rename from tests/validation/CPP/IntegralImage.cpp
rename to tests/validation/reference/IntegralImage.cpp
diff --git a/tests/validation/CPP/IntegralImage.h b/tests/validation/reference/IntegralImage.h
similarity index 100%
rename from tests/validation/CPP/IntegralImage.h
rename to tests/validation/reference/IntegralImage.h
diff --git a/tests/validation/CPP/L2Normalize.cpp b/tests/validation/reference/L2NormalizeLayer.cpp
similarity index 98%
rename from tests/validation/CPP/L2Normalize.cpp
rename to tests/validation/reference/L2NormalizeLayer.cpp
index 4fb4d57..99f4e8a 100644
--- a/tests/validation/CPP/L2Normalize.cpp
+++ b/tests/validation/reference/L2NormalizeLayer.cpp
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "L2Normalize.h"
+#include "L2NormalizeLayer.h"
 #include "ReductionOperation.h"
 
 #include "tests/validation/Helpers.h"
diff --git a/tests/validation/CPP/L2Normalize.h b/tests/validation/reference/L2NormalizeLayer.h
similarity index 100%
rename from tests/validation/CPP/L2Normalize.h
rename to tests/validation/reference/L2NormalizeLayer.h
diff --git a/tests/validation/reference/Magnitude.cpp b/tests/validation/reference/Magnitude.cpp
new file mode 100644
index 0000000..f0002bf
--- /dev/null
+++ b/tests/validation/reference/Magnitude.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Magnitude.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> magnitude(const SimpleTensor<T> &gx, const SimpleTensor<T> &gy, MagnitudeType magnitude_type)
+{
+    SimpleTensor<T> mag(gx.shape(), gx.data_type());
+
+    using intermediate_type = typename common_promoted_unsigned_type<T>::intermediate_type;
+
+    for(int i = 0; i < gx.num_elements(); ++i)
+    {
+        double val = 0.f;
+
+        if(magnitude_type == MagnitudeType::L1NORM)
+        {
+            val = static_cast<intermediate_type>(std::abs(gx[i])) + static_cast<intermediate_type>(std::abs(gy[i]));
+        }
+        else // MagnitudeType::L2NORM
+        {
+            // Note: kernel saturates to uint32_t instead of intermediate_type for S32 format
+            auto sum = static_cast<uint32_t>(gx[i] * gx[i]) + static_cast<uint32_t>(gy[i] * gy[i]);
+            val      = std::sqrt(sum) + 0.5f;
+        }
+
+        mag[i] = saturate_cast<T>(val);
+    }
+
+    return mag;
+}
+
+template SimpleTensor<int16_t> magnitude(const SimpleTensor<int16_t> &gx, const SimpleTensor<int16_t> &gy, MagnitudeType magnitude_type);
+template SimpleTensor<int32_t> magnitude(const SimpleTensor<int32_t> &gx, const SimpleTensor<int32_t> &gy, MagnitudeType magnitude_type);
+template SimpleTensor<half_float::half> magnitude(const SimpleTensor<half_float::half> &gx, const SimpleTensor<half_float::half> &gy, MagnitudeType magnitude_type);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/Scale.h b/tests/validation/reference/Magnitude.h
similarity index 82%
copy from tests/validation/CPP/Scale.h
copy to tests/validation/reference/Magnitude.h
index 53183ae..7562071 100644
--- a/tests/validation/CPP/Scale.h
+++ b/tests/validation/reference/Magnitude.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_SCALE_H__
-#define __ARM_COMPUTE_TEST_SCALE_H__
+#ifndef __ARM_COMPUTE_TEST_MAGNITUDE_H__
+#define __ARM_COMPUTE_TEST_MAGNITUDE_H__
 
 #include "tests/SimpleTensor.h"
 
@@ -35,9 +35,9 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> scale(const SimpleTensor<T> &in, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value = 0);
+SimpleTensor<T> magnitude(const SimpleTensor<T> &gx, const SimpleTensor<T> &gy, MagnitudeType magnitude_type);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_SCALE_H__ */
+#endif /* __ARM_COMPUTE_TEST_MAGNITUDE_H__ */
diff --git a/tests/validation/CPP/MeanStdDev.cpp b/tests/validation/reference/MeanStdDev.cpp
similarity index 100%
rename from tests/validation/CPP/MeanStdDev.cpp
rename to tests/validation/reference/MeanStdDev.cpp
diff --git a/tests/validation/CPP/MeanStdDev.h b/tests/validation/reference/MeanStdDev.h
similarity index 100%
rename from tests/validation/CPP/MeanStdDev.h
rename to tests/validation/reference/MeanStdDev.h
diff --git a/tests/validation/reference/Median3x3.cpp b/tests/validation/reference/Median3x3.cpp
new file mode 100644
index 0000000..91a787a
--- /dev/null
+++ b/tests/validation/reference/Median3x3.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Helpers.h"
+
+#include "Median3x3.h"
+#include "Utils.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+constexpr unsigned int filter_size = 3;              /* Size of the kernel/filter in number of elements. */
+constexpr BorderSize   border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */
+} // namespace
+
+template <typename T>
+SimpleTensor<T> median3x3(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value)
+{
+    SimpleTensor<T> dst(src.shape(), src.data_type());
+    const int       size_tot_filter = filter_size * filter_size;
+
+    for(int src_idx = 0; src_idx < src.num_elements(); ++src_idx)
+    {
+        std::array<T, size_tot_filter> filter_elems = { { 0 } };
+        Coordinates id = index2coord(src.shape(), src_idx);
+        const int   x  = id.x();
+        const int   y  = id.y();
+
+        for(int j = y - static_cast<int>(border_size.top), index = 0; j <= y + static_cast<int>(border_size.bottom); ++j)
+        {
+            for(int i = x - static_cast<int>(border_size.left); i <= x + static_cast<int>(border_size.right); ++i, ++index)
+            {
+                id.set(0, i);
+                id.set(1, j);
+                filter_elems[index] = tensor_elem_at(src, id, border_mode, constant_border_value);
+            }
+        }
+        std::sort(filter_elems.begin(), filter_elems.end());
+        dst[src_idx] = filter_elems[size_tot_filter / 2];
+    }
+
+    return dst;
+}
+
+template SimpleTensor<uint8_t> median3x3(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/Scale.h b/tests/validation/reference/Median3x3.h
similarity index 82%
copy from tests/validation/CPP/Scale.h
copy to tests/validation/reference/Median3x3.h
index 53183ae..8345daa 100644
--- a/tests/validation/CPP/Scale.h
+++ b/tests/validation/reference/Median3x3.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_SCALE_H__
-#define __ARM_COMPUTE_TEST_SCALE_H__
+#ifndef __ARM_COMPUTE_TEST_MEDIAN3X3_H__
+#define __ARM_COMPUTE_TEST_MEDIAN3X3_H__
 
 #include "tests/SimpleTensor.h"
 
@@ -35,9 +35,9 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> scale(const SimpleTensor<T> &in, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value = 0);
+SimpleTensor<T> median3x3(const SimpleTensor<T> &src, BorderMode border_mode, T constant_border_value);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_SCALE_H__ */
+#endif /* __ARM_COMPUTE_TEST_MEDIAN3X3_H__ */
diff --git a/tests/validation/CPP/MinMaxLocation.cpp b/tests/validation/reference/MinMaxLocation.cpp
similarity index 100%
rename from tests/validation/CPP/MinMaxLocation.cpp
rename to tests/validation/reference/MinMaxLocation.cpp
diff --git a/tests/validation/CPP/MinMaxLocation.h b/tests/validation/reference/MinMaxLocation.h
similarity index 100%
rename from tests/validation/CPP/MinMaxLocation.h
rename to tests/validation/reference/MinMaxLocation.h
diff --git a/tests/validation/CPP/NonLinearFilter.cpp b/tests/validation/reference/NonLinearFilter.cpp
similarity index 100%
rename from tests/validation/CPP/NonLinearFilter.cpp
rename to tests/validation/reference/NonLinearFilter.cpp
diff --git a/tests/validation/CPP/NonLinearFilter.h b/tests/validation/reference/NonLinearFilter.h
similarity index 100%
rename from tests/validation/CPP/NonLinearFilter.h
rename to tests/validation/reference/NonLinearFilter.h
diff --git a/tests/validation/CPP/NonMaximaSuppression.cpp b/tests/validation/reference/NonMaximaSuppression.cpp
similarity index 100%
rename from tests/validation/CPP/NonMaximaSuppression.cpp
rename to tests/validation/reference/NonMaximaSuppression.cpp
diff --git a/tests/validation/CPP/NonMaximaSuppression.h b/tests/validation/reference/NonMaximaSuppression.h
similarity index 100%
rename from tests/validation/CPP/NonMaximaSuppression.h
rename to tests/validation/reference/NonMaximaSuppression.h
diff --git a/tests/validation/CPP/NormalizationLayer.cpp b/tests/validation/reference/NormalizationLayer.cpp
similarity index 100%
rename from tests/validation/CPP/NormalizationLayer.cpp
rename to tests/validation/reference/NormalizationLayer.cpp
diff --git a/tests/validation/CPP/NormalizationLayer.h b/tests/validation/reference/NormalizationLayer.h
similarity index 100%
rename from tests/validation/CPP/NormalizationLayer.h
rename to tests/validation/reference/NormalizationLayer.h
diff --git a/tests/validation/CPP/ArithmeticSubtraction.h b/tests/validation/reference/Permute.cpp
similarity index 60%
copy from tests/validation/CPP/ArithmeticSubtraction.h
copy to tests/validation/reference/Permute.cpp
index 18b0d12..4a12ca6 100644
--- a/tests/validation/CPP/ArithmeticSubtraction.h
+++ b/tests/validation/reference/Permute.cpp
@@ -21,10 +21,9 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_ARITHMETIC_SUBTRACTION_H__
-#define __ARM_COMPUTE_TEST_ARITHMETIC_SUBTRACTION_H__
+#include "Permute.h"
 
-#include "tests/SimpleTensor.h"
+#include "arm_compute/core/Types.h"
 #include "tests/validation/Helpers.h"
 
 namespace arm_compute
@@ -36,9 +35,32 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> arithmetic_subtraction(const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
+SimpleTensor<T> permute(const SimpleTensor<T> &src, PermutationVector perm)
+{
+    // Permute shapes
+    TensorShape dst_shape = src.shape();
+    permute(dst_shape, perm);
+
+    // Create reference
+    SimpleTensor<T> dst{ dst_shape, src.data_type() };
+
+    // Compute reference
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        Coordinates coord = index2coord(src.shape(), i);
+        permute(coord, perm);
+        const size_t dst_index = coord2index(dst.shape(), coord);
+
+        dst[dst_index] = src[i];
+    }
+
+    return dst;
+}
+
+template SimpleTensor<uint8_t> permute(const SimpleTensor<uint8_t> &src, PermutationVector perm);
+template SimpleTensor<uint16_t> permute(const SimpleTensor<uint16_t> &src, PermutationVector perm);
+template SimpleTensor<uint32_t> permute(const SimpleTensor<uint32_t> &src, PermutationVector perm);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_ARITHMETIC_SUBTRACTION_H__ */
diff --git a/tests/validation/CPP/Scale.h b/tests/validation/reference/Permute.h
similarity index 82%
copy from tests/validation/CPP/Scale.h
copy to tests/validation/reference/Permute.h
index 53183ae..295ad2b 100644
--- a/tests/validation/CPP/Scale.h
+++ b/tests/validation/reference/Permute.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_SCALE_H__
-#define __ARM_COMPUTE_TEST_SCALE_H__
+#ifndef __ARM_COMPUTE_TEST_PERMUTE_H__
+#define __ARM_COMPUTE_TEST_PERMUTE_H__
 
 #include "tests/SimpleTensor.h"
 
@@ -35,9 +35,9 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> scale(const SimpleTensor<T> &in, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value = 0);
+SimpleTensor<T> permute(const SimpleTensor<T> &src, PermutationVector perm);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_SCALE_H__ */
+#endif /* __ARM_COMPUTE_TEST_PERMUTE_H__ */
diff --git a/tests/validation/reference/Phase.cpp b/tests/validation/reference/Phase.cpp
new file mode 100644
index 0000000..7827cd2
--- /dev/null
+++ b/tests/validation/reference/Phase.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Phase.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<uint8_t> phase(const SimpleTensor<T> &gx, const SimpleTensor<T> &gy, PhaseType phase_type)
+{
+    const float           PI = std::atan(1) * 4;
+    SimpleTensor<uint8_t> phase(gx.shape(), DataType::U8);
+
+    if(phase_type == PhaseType::UNSIGNED) // unsigned: map to [0-255)
+    {
+        for(int i = 0; i < gx.num_elements(); ++i)
+        {
+            float angle_deg = (std::atan2(float(gy[i]), float(gx[i])) / PI) * 180.0f;
+            phase[i]        = (angle_deg < 0.0f) ? 180.f + angle_deg : angle_deg;
+        }
+    }
+    else // signed: map to [0-180) degrees
+    {
+        for(int i = 0; i < gx.num_elements(); ++i)
+        {
+            float angle_pi = std::atan2(gy[i], gx[i]) / PI;
+            angle_pi       = (angle_pi < 0.0f) ? 2 + angle_pi : angle_pi;
+            phase[i]       = lround(angle_pi * 128) & 0xFFu;
+        }
+    }
+
+    return phase;
+}
+
+template SimpleTensor<uint8_t> phase(const SimpleTensor<int16_t> &gx, const SimpleTensor<int16_t> &gy, PhaseType phase_type);
+template SimpleTensor<uint8_t> phase(const SimpleTensor<int32_t> &gx, const SimpleTensor<int32_t> &gy, PhaseType phase_type);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/Scale.h b/tests/validation/reference/Phase.h
similarity index 82%
copy from tests/validation/CPP/Scale.h
copy to tests/validation/reference/Phase.h
index 53183ae..d322d53 100644
--- a/tests/validation/CPP/Scale.h
+++ b/tests/validation/reference/Phase.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_SCALE_H__
-#define __ARM_COMPUTE_TEST_SCALE_H__
+#ifndef __ARM_COMPUTE_TEST_PHASE_H__
+#define __ARM_COMPUTE_TEST_PHASE_H__
 
 #include "tests/SimpleTensor.h"
 
@@ -35,9 +35,9 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> scale(const SimpleTensor<T> &in, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value = 0);
+SimpleTensor<uint8_t> phase(const SimpleTensor<T> &gx, const SimpleTensor<T> &gy, PhaseType phase_type);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_SCALE_H__ */
+#endif /* __ARM_COMPUTE_TEST_PHASE_H__ */
diff --git a/tests/validation/reference/PixelWiseMultiplication.cpp b/tests/validation/reference/PixelWiseMultiplication.cpp
new file mode 100644
index 0000000..b3647fc
--- /dev/null
+++ b/tests/validation/reference/PixelWiseMultiplication.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * dst OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "PixelWiseMultiplication.h"
+
+#include "tests/validation/FixedPoint.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <class T>
+struct is_floating_point
+    : std::integral_constant < bool,
+      std::is_same<float, typename std::remove_cv<T>::type>::value || std::is_same<half_float::half, typename std::remove_cv<T>::type>::value
+      || std::is_same<double, typename std::remove_cv<T>::type>::value || std::is_same<long double, typename std::remove_cv<T>::type>::value >
+{
+};
+
+template <typename T1, typename T2>
+SimpleTensor<T2> pixel_wise_multiplication(const SimpleTensor<T1> &src1, const SimpleTensor<T2> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
+{
+    SimpleTensor<T2> dst(src2.shape(), src2.data_type());
+
+    if(scale < 0)
+    {
+        ARM_COMPUTE_ERROR("Scale of pixel-wise multiplication must be non-negative");
+    }
+
+    using intermediate_type = typename common_promoted_signed_type<T1, T2, T2>::intermediate_type;
+
+    for(int i = 0; i < src1.num_elements(); ++i)
+    {
+        double val = static_cast<intermediate_type>(src1[i]) * static_cast<intermediate_type>(src2[i]) * static_cast<double>(scale);
+        if(is_floating_point<T2>::value)
+        {
+            dst[i] = val;
+        }
+        else
+        {
+            double rounded_val = 0;
+            switch(rounding_policy)
+            {
+                case(RoundingPolicy::TO_ZERO):
+                    rounded_val = support::cpp11::trunc(val);
+                    break;
+                case(RoundingPolicy::TO_NEAREST_UP):
+                    rounded_val = round_half_up(val);
+                    break;
+                case(RoundingPolicy::TO_NEAREST_EVEN):
+                    rounded_val = round_half_even(val);
+                    break;
+                default:
+                    ARM_COMPUTE_ERROR("Unsupported rounding policy");
+            }
+
+            dst[i] = (convert_policy == ConvertPolicy::SATURATE) ? saturate_cast<T2>(rounded_val) : static_cast<T2>(rounded_val);
+        }
+    }
+
+    return dst;
+}
+
+// *INDENT-OFF*
+// clang-format off
+template SimpleTensor<uint8_t> pixel_wise_multiplication(const SimpleTensor<uint8_t> &src1, const SimpleTensor<uint8_t> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy);
+template SimpleTensor<int16_t> pixel_wise_multiplication(const SimpleTensor<uint8_t> &src1, const SimpleTensor<int16_t> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy);
+template SimpleTensor<int16_t> pixel_wise_multiplication(const SimpleTensor<int16_t> &src1, const SimpleTensor<int16_t> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy);
+template SimpleTensor<float> pixel_wise_multiplication(const SimpleTensor<float> &src1, const SimpleTensor<float> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy);
+template SimpleTensor<half_float::half> pixel_wise_multiplication(const SimpleTensor<half_float::half> &src1, const SimpleTensor<half_float::half> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy);
+// clang-format on
+// *INDENT-ON*
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/ArithmeticSubtraction.h b/tests/validation/reference/PixelWiseMultiplication.h
similarity index 77%
copy from tests/validation/CPP/ArithmeticSubtraction.h
copy to tests/validation/reference/PixelWiseMultiplication.h
index 18b0d12..1dce154 100644
--- a/tests/validation/CPP/ArithmeticSubtraction.h
+++ b/tests/validation/reference/PixelWiseMultiplication.h
@@ -21,11 +21,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_ARITHMETIC_SUBTRACTION_H__
-#define __ARM_COMPUTE_TEST_ARITHMETIC_SUBTRACTION_H__
+#ifndef __ARM_COMPUTE_TEST_PIXEL_WISE_MULTIPLICATION_H__
+#define __ARM_COMPUTE_TEST_PIXEL_WISE_MULTIPLICATION_H__
 
 #include "tests/SimpleTensor.h"
-#include "tests/validation/Helpers.h"
 
 namespace arm_compute
 {
@@ -35,10 +34,10 @@
 {
 namespace reference
 {
-template <typename T>
-SimpleTensor<T> arithmetic_subtraction(const SimpleTensor<T> &src1, const SimpleTensor<T> &src2, DataType dst_data_type, ConvertPolicy convert_policy);
+template <typename T1, typename T2>
+SimpleTensor<T2> pixel_wise_multiplication(const SimpleTensor<T1> &src1, const SimpleTensor<T2> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_ARITHMETIC_SUBTRACTION_H__ */
+#endif /* __ARM_COMPUTE_TEST_PIXEL_WISE_MULTIPLICATION_H__ */
diff --git a/tests/validation/CPP/PoolingLayer.cpp b/tests/validation/reference/PoolingLayer.cpp
similarity index 82%
rename from tests/validation/CPP/PoolingLayer.cpp
rename to tests/validation/reference/PoolingLayer.cpp
index 85a8343..1a7dd4c 100644
--- a/tests/validation/CPP/PoolingLayer.cpp
+++ b/tests/validation/reference/PoolingLayer.cpp
@@ -25,6 +25,7 @@
 
 #include "arm_compute/core/Types.h"
 #include "tests/validation/FixedPoint.h"
+#include "tests/validation/Helpers.h"
 
 namespace arm_compute
 {
@@ -39,10 +40,11 @@
 TensorShape calculate_output_shape(TensorShape shape, PoolingLayerInfo info)
 {
     TensorShape dst_shape = shape;
+    const int   pool_size = info.is_global_pooling() ? shape.x() : info.pool_size();
     const std::pair<unsigned int, unsigned int> scaled_dims = arm_compute::scaled_dimensions(shape.x(),
                                                                                              shape.y(),
-                                                                                             info.pool_size(),
-                                                                                             info.pool_size(),
+                                                                                             pool_size,
+                                                                                             pool_size,
                                                                                              info.pad_stride_info());
     dst_shape.set(0, scaled_dims.first);
     dst_shape.set(1, scaled_dims.second);
@@ -54,12 +56,15 @@
 template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type>
 SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, PoolingLayerInfo info)
 {
-    const int   pool_size     = info.pool_size();
-    PoolingType type          = info.pool_type();
-    int         pool_stride_x = info.pad_stride_info().stride().first;
-    int         pool_stride_y = info.pad_stride_info().stride().second;
-    int         pad_x         = info.pad_stride_info().pad().first;
-    int         pad_y         = info.pad_stride_info().pad().second;
+    ARM_COMPUTE_ERROR_ON(info.is_global_pooling() && (src.shape().x() != src.shape().y()));
+
+    const int   pool_size       = info.is_global_pooling() ? src.shape().x() : info.pool_size();
+    PoolingType type            = info.pool_type();
+    int         pool_stride_x   = info.pad_stride_info().stride().first;
+    int         pool_stride_y   = info.pad_stride_info().stride().second;
+    int         pad_x           = info.pad_stride_info().pad().first;
+    int         pad_y           = info.pad_stride_info().pad().second;
+    bool        exclude_padding = info.exclude_padding();
 
     const auto w_src      = static_cast<int>(src.shape()[0]);
     const auto h_src      = static_cast<int>(src.shape()[1]);
@@ -122,6 +127,11 @@
                     hstart     = std::max(hstart, 0);
                     wend       = std::min(wend, w_src);
                     hend       = std::min(hend, h_src);
+                    // Exclude padding pixels from the average
+                    if(exclude_padding)
+                    {
+                        pool = (hend - hstart) * (wend - wstart);
+                    }
 
                     if(type == PoolingType::AVG)
                     {
@@ -157,12 +167,15 @@
 template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type>
 SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, PoolingLayerInfo info)
 {
-    const int   pool_size     = info.pool_size();
-    PoolingType type          = info.pool_type();
-    int         pool_stride_x = info.pad_stride_info().stride().first;
-    int         pool_stride_y = info.pad_stride_info().stride().second;
-    int         pad_x         = info.pad_stride_info().pad().first;
-    int         pad_y         = info.pad_stride_info().pad().second;
+    ARM_COMPUTE_ERROR_ON(info.is_global_pooling() && (src.shape().x() != src.shape().y()));
+
+    const int   pool_size       = info.is_global_pooling() ? src.shape().x() : info.pool_size();
+    PoolingType type            = info.pool_type();
+    int         pool_stride_x   = info.pad_stride_info().stride().first;
+    int         pool_stride_y   = info.pad_stride_info().stride().second;
+    int         pad_x           = info.pad_stride_info().pad().first;
+    int         pad_y           = info.pad_stride_info().pad().second;
+    bool        exclude_padding = info.exclude_padding();
 
     const auto w_src      = static_cast<int>(src.shape()[0]);
     const auto h_src      = static_cast<int>(src.shape()[1]);
@@ -224,6 +237,11 @@
                     hstart     = std::max(hstart, 0);
                     wend       = std::min(wend, w_src);
                     hend       = std::min(hend, h_src);
+                    // Exclude padding pixels from the average
+                    if(exclude_padding)
+                    {
+                        pool = (hend - hstart) * (wend - wstart);
+                    }
 
                     using namespace fixed_point_arithmetic;
 
@@ -265,6 +283,15 @@
     return dst;
 }
 
+template <>
+SimpleTensor<uint8_t> pooling_layer<uint8_t>(const SimpleTensor<uint8_t> &src, PoolingLayerInfo info)
+{
+    SimpleTensor<float>   src_tmp = convert_from_asymmetric(src);
+    SimpleTensor<float>   dst_tmp = pooling_layer<float>(src_tmp, info);
+    SimpleTensor<uint8_t> dst     = convert_to_asymmetric(dst_tmp, src.quantization_info());
+    return dst;
+}
+
 template SimpleTensor<float> pooling_layer(const SimpleTensor<float> &src, PoolingLayerInfo info);
 template SimpleTensor<half> pooling_layer(const SimpleTensor<half> &src, PoolingLayerInfo info);
 template SimpleTensor<qint8_t> pooling_layer(const SimpleTensor<qint8_t> &src, PoolingLayerInfo info);
diff --git a/tests/validation/CPP/PoolingLayer.h b/tests/validation/reference/PoolingLayer.h
similarity index 100%
rename from tests/validation/CPP/PoolingLayer.h
rename to tests/validation/reference/PoolingLayer.h
diff --git a/tests/validation/CPP/QuantizationLayer.cpp b/tests/validation/reference/QuantizationLayer.cpp
similarity index 100%
rename from tests/validation/CPP/QuantizationLayer.cpp
rename to tests/validation/reference/QuantizationLayer.cpp
diff --git a/tests/validation/CPP/QuantizationLayer.h b/tests/validation/reference/QuantizationLayer.h
similarity index 100%
rename from tests/validation/CPP/QuantizationLayer.h
rename to tests/validation/reference/QuantizationLayer.h
diff --git a/tests/validation/CPP/ReductionOperation.cpp b/tests/validation/reference/ReductionOperation.cpp
similarity index 100%
rename from tests/validation/CPP/ReductionOperation.cpp
rename to tests/validation/reference/ReductionOperation.cpp
diff --git a/tests/validation/CPP/ReductionOperation.h b/tests/validation/reference/ReductionOperation.h
similarity index 100%
rename from tests/validation/CPP/ReductionOperation.h
rename to tests/validation/reference/ReductionOperation.h
diff --git a/tests/validation/reference/Remap.cpp b/tests/validation/reference/Remap.cpp
new file mode 100644
index 0000000..bef5962
--- /dev/null
+++ b/tests/validation/reference/Remap.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Remap.h"
+
+#include "Utils.h"
+#include "tests/validation/Helpers.h"
+
+#include <algorithm>
+#include <array>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> remap(const SimpleTensor<T> &in, SimpleTensor<float> &map_x, SimpleTensor<float> &map_y, SimpleTensor<T> &valid_mask, InterpolationPolicy policy, BorderMode border_mode,
+                      T constant_border_value)
+{
+    ARM_COMPUTE_ERROR_ON_MSG(border_mode == BorderMode::REPLICATE, "BorderMode not supported");
+
+    SimpleTensor<T> out(in.shape(), in.data_type());
+
+    const int width  = in.shape().x();
+    const int height = in.shape().y();
+
+    for(int idx = 0; idx < out.num_elements(); idx++)
+    {
+        valid_mask[idx] = 1;
+        Coordinates src_idx;
+        src_idx.set(0, static_cast<int>(std::floor(map_x[idx])));
+        src_idx.set(1, static_cast<int>(std::floor(map_y[idx])));
+        if((0 <= map_y[idx]) && (map_y[idx] < height) && (0 <= map_x[idx]) && (map_x[idx] < width))
+        {
+            switch(policy)
+            {
+                case InterpolationPolicy::NEAREST_NEIGHBOR:
+                    out[idx] = tensor_elem_at(in, src_idx, border_mode, constant_border_value);
+                    break;
+                case InterpolationPolicy::BILINEAR:
+                    (valid_bilinear_policy(map_x[idx], map_y[idx], width, height, border_mode)) ? out[idx] = bilinear_policy(in, src_idx, map_x[idx], map_y[idx], border_mode, constant_border_value) : valid_mask[idx] = 0;
+                    break;
+                case InterpolationPolicy::AREA:
+                default:
+                    ARM_COMPUTE_ERROR("Interpolation not supported");
+                    break;
+            }
+        }
+        else
+        {
+            if(border_mode == BorderMode::UNDEFINED)
+            {
+                valid_mask[idx] = 0;
+            }
+            else
+            {
+                switch(policy)
+                {
+                    case InterpolationPolicy::NEAREST_NEIGHBOR:
+                        out[idx] = constant_border_value;
+                        break;
+                    case InterpolationPolicy::BILINEAR:
+                        out[idx] = bilinear_policy(in, src_idx, map_x[idx], map_y[idx], border_mode, constant_border_value);
+                        break;
+                    case InterpolationPolicy::AREA:
+                    default:
+                        break;
+                }
+            }
+        }
+    }
+
+    return out;
+}
+
+template SimpleTensor<uint8_t> remap(const SimpleTensor<uint8_t> &src, SimpleTensor<float> &map_x, SimpleTensor<float> &map_y, SimpleTensor<uint8_t> &valid_mask, InterpolationPolicy policy,
+                                     BorderMode border_mode,
+                                     uint8_t    constant_border_value);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/Scale.h b/tests/validation/reference/Remap.h
similarity index 79%
copy from tests/validation/CPP/Scale.h
copy to tests/validation/reference/Remap.h
index 53183ae..918ba95 100644
--- a/tests/validation/CPP/Scale.h
+++ b/tests/validation/reference/Remap.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_SCALE_H__
-#define __ARM_COMPUTE_TEST_SCALE_H__
+#ifndef __ARM_COMPUTE_TEST_REMAP_H__
+#define __ARM_COMPUTE_TEST_REMAP_H__
 
 #include "tests/SimpleTensor.h"
 
@@ -35,9 +35,10 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> scale(const SimpleTensor<T> &in, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value = 0);
+SimpleTensor<T> remap(const SimpleTensor<T> &in, SimpleTensor<float> &map_x, SimpleTensor<float> &map_y, SimpleTensor<T> &valid_mask, InterpolationPolicy policy, BorderMode border_mode,
+                      T constant_border_value = 0);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_SCALE_H__ */
+#endif /* __ARM_COMPUTE_TEST_REMAP_H__ */
diff --git a/tests/validation/CPP/ReshapeLayer.cpp b/tests/validation/reference/ReshapeLayer.cpp
similarity index 100%
rename from tests/validation/CPP/ReshapeLayer.cpp
rename to tests/validation/reference/ReshapeLayer.cpp
diff --git a/tests/validation/CPP/ReshapeLayer.h b/tests/validation/reference/ReshapeLayer.h
similarity index 100%
rename from tests/validation/CPP/ReshapeLayer.h
rename to tests/validation/reference/ReshapeLayer.h
diff --git a/tests/validation/CPP/Scale.cpp b/tests/validation/reference/Scale.cpp
similarity index 70%
rename from tests/validation/CPP/Scale.cpp
rename to tests/validation/reference/Scale.cpp
index 74489aa..727325f 100644
--- a/tests/validation/CPP/Scale.cpp
+++ b/tests/validation/reference/Scale.cpp
@@ -26,6 +26,7 @@
 
 #include "Scale.h"
 #include "Utils.h"
+#include "support/ToolchainSupport.h"
 
 namespace arm_compute
 {
@@ -36,11 +37,14 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> scale(const SimpleTensor<T> &in, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value)
+SimpleTensor<T> scale(const SimpleTensor<T> &in, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value,
+                      SamplingPolicy sampling_policy, bool ceil_policy_scale)
 {
-    TensorShape shape_scaled(in.shape());
-    shape_scaled.set(0, in.shape()[0] * scale_x);
-    shape_scaled.set(1, in.shape()[1] * scale_y);
+    // Add 1 if ceil_policy_scale is true
+    const size_t round_value = ceil_policy_scale ? 1U : 0U;
+    TensorShape  shape_scaled(in.shape());
+    shape_scaled.set(0, (in.shape()[0] + round_value) * scale_x);
+    shape_scaled.set(1, (in.shape()[1] + round_value) * scale_y);
     SimpleTensor<T> out(shape_scaled, in.data_type());
 
     // Compute the ratio between source width/height and destination width/height
@@ -50,6 +54,9 @@
     const auto width  = static_cast<int>(in.shape().x());
     const auto height = static_cast<int>(in.shape().y());
 
+    // Determine border size
+    const int border_size = (border_mode == BorderMode::UNDEFINED) ? 0 : 1;
+
     // Area interpolation behaves as Nearest Neighbour in case of up-sampling
     if(policy == InterpolationPolicy::AREA && wr <= 1.f && hr <= 1.f)
     {
@@ -61,8 +68,23 @@
         Coordinates id    = index2coord(out.shape(), element_idx);
         int         idx   = id.x();
         int         idy   = id.y();
-        float       x_src = (idx + 0.5f) * wr - 0.5f;
-        float       y_src = (idy + 0.5f) * hr - 0.5f;
+        float       x_src = 0;
+        float       y_src = 0;
+
+        switch(sampling_policy)
+        {
+            case SamplingPolicy::TOP_LEFT:
+                x_src = idx * wr;
+                y_src = idy * hr;
+                break;
+            case SamplingPolicy::CENTER:
+                x_src = (idx + 0.5f) * wr - 0.5f;
+                y_src = (idy + 0.5f) * hr - 0.5f;
+                break;
+            default:
+                ARM_COMPUTE_ERROR("Unsupported sampling policy.");
+                break;
+        }
 
         switch(policy)
         {
@@ -75,30 +97,17 @@
                 id.set(1, y_src);
 
                 // If coordinates in range of tensor's width or height
-                if(x_src >= -1 || y_src >= -1 || x_src <= width || y_src <= height)
+                if(is_valid_pixel_index(x_src, y_src, width, height, border_size))
                 {
                     out[element_idx] = tensor_elem_at(in, id, border_mode, constant_border_value);
                 }
-                else
-                {
-                    if(border_mode == BorderMode::CONSTANT)
-                    {
-                        out[element_idx] = constant_border_value;
-                    }
-                    else if(border_mode == BorderMode::REPLICATE)
-                    {
-                        id.set(0, clamp(static_cast<int>(x_src), 0, width - 1));
-                        id.set(1, clamp(static_cast<int>(y_src), 0, height - 1));
-                        out[element_idx] = in[coord2index(in.shape(), id)];
-                    }
-                }
                 break;
             }
             case InterpolationPolicy::BILINEAR:
             {
                 id.set(0, std::floor(x_src));
                 id.set(1, std::floor(y_src));
-                if(x_src >= -1 || y_src >= -1 || x_src <= width || y_src <= height)
+                if(is_valid_pixel_index(x_src, y_src, width, height, border_size))
                 {
                     out[element_idx] = bilinear_policy(in, id, x_src, y_src, border_mode, constant_border_value);
                 }
@@ -127,14 +136,14 @@
                 const int yi     = std::floor(y_src);
 
                 // Clamp position to borders
-                x_src = std::max(-1.f, std::min(x_src, static_cast<float>(width)));
-                y_src = std::max(-1.f, std::min(y_src, static_cast<float>(height)));
+                x_src = std::max(-static_cast<float>(border_size), std::min(x_src, static_cast<float>(width - 1 + border_size)));
+                y_src = std::max(-static_cast<float>(border_size), std::min(y_src, static_cast<float>(height - 1 + border_size)));
 
                 // Clamp bounding box offsets to borders
-                x_from = ((x_src + x_from) < -1) ? -1 : x_from;
-                y_from = ((y_src + y_from) < -1) ? -1 : y_from;
-                x_to   = ((x_src + x_to) > width) ? (width - x_src) : x_to;
-                y_to   = ((y_src + y_to) > height) ? (height - y_src) : y_to;
+                x_from = ((x_src + x_from) < -border_size) ? -border_size : x_from;
+                y_from = ((y_src + y_from) < -border_size) ? -border_size : y_from;
+                x_to   = ((x_src + x_to) >= (width + border_size)) ? (width - 1 + border_size) : x_to;
+                y_to   = ((y_src + y_to) >= (height + border_size)) ? (height - 1 + border_size) : y_to;
                 ARM_COMPUTE_ERROR_ON((x_to - x_from + 1) == 0 || (y_to - y_from + 1) == 0);
 
                 float sum = 0;
@@ -159,10 +168,14 @@
     return out;
 }
 
-template SimpleTensor<uint8_t> scale(const SimpleTensor<uint8_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value);
-template SimpleTensor<int16_t> scale(const SimpleTensor<int16_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, int16_t constant_border_value);
-template SimpleTensor<half> scale(const SimpleTensor<half> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, half constant_border_value);
-template SimpleTensor<float> scale(const SimpleTensor<float> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, float constant_border_value);
+template SimpleTensor<uint8_t> scale(const SimpleTensor<uint8_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value,
+                                     SamplingPolicy sampling_policy, bool ceil_policy_scale);
+template SimpleTensor<int16_t> scale(const SimpleTensor<int16_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, int16_t constant_border_value,
+                                     SamplingPolicy sampling_policy, bool ceil_policy_scale);
+template SimpleTensor<half> scale(const SimpleTensor<half> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, half constant_border_value,
+                                  SamplingPolicy sampling_policy, bool ceil_policy_scale);
+template SimpleTensor<float> scale(const SimpleTensor<float> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, float constant_border_value,
+                                   SamplingPolicy sampling_policy, bool ceil_policy_scale);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/CPP/Scale.h b/tests/validation/reference/Scale.h
similarity index 91%
rename from tests/validation/CPP/Scale.h
rename to tests/validation/reference/Scale.h
index 53183ae..566e30a 100644
--- a/tests/validation/CPP/Scale.h
+++ b/tests/validation/reference/Scale.h
@@ -35,7 +35,8 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> scale(const SimpleTensor<T> &in, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value = 0);
+SimpleTensor<T> scale(const SimpleTensor<T> &in, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value = 0,
+                      SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool ceil_policy_scale = false);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/Scharr.cpp b/tests/validation/reference/Scharr.cpp
new file mode 100644
index 0000000..98e4d62
--- /dev/null
+++ b/tests/validation/reference/Scharr.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Scharr.h"
+
+#include "Utils.h"
+#include "tests/validation/Helpers.h"
+
+#include <array>
+#include <map>
+#include <utility>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+const std::array<int8_t, 9> scharr_3_x{ { -3, 0, 3, -10, 0, 10, -3, 0, 3 } };
+const std::array<int8_t, 9> scharr_3_y{ { -3, -10, -3, 0, 0, 0, 3, 10, 3 } };
+
+const std::map<int, std::pair<const int8_t *, const int8_t *>> masks
+{
+    { 3, { scharr_3_x.data(), scharr_3_y.data() } }
+};
+
+template <typename T>
+struct data_type;
+
+template <>
+struct data_type<int16_t>
+{
+    const static DataType value = DataType::S16;
+};
+} // namespace
+
+template <typename T, typename U>
+std::pair<SimpleTensor<T>, SimpleTensor<T>> scharr(const SimpleTensor<U> &src, int filter_size, BorderMode border_mode, uint8_t constant_border_value, GradientDimension gradient_dimension)
+{
+    const auto shape_size = static_cast<unsigned int>(filter_size);
+
+    SimpleTensor<T> dst_x(src.shape(), data_type<T>::value, src.num_channels());
+    SimpleTensor<T> dst_y(src.shape(), data_type<T>::value, src.num_channels());
+
+    ValidRegion valid_region = shape_to_valid_region(src.shape(), border_mode == BorderMode::UNDEFINED, BorderSize(filter_size / 2));
+
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        Coordinates coord = index2coord(src.shape(), i);
+
+        if(!is_in_valid_region(valid_region, coord))
+        {
+            continue;
+        }
+
+        switch(gradient_dimension)
+        {
+            case GradientDimension::GRAD_X:
+                apply_2d_spatial_filter(coord, src, dst_x, TensorShape{ shape_size, shape_size }, masks.at(filter_size).first, 1.f, border_mode, constant_border_value);
+                break;
+            case GradientDimension::GRAD_Y:
+                apply_2d_spatial_filter(coord, src, dst_y, TensorShape{ shape_size, shape_size }, masks.at(filter_size).second, 1.f, border_mode, constant_border_value);
+                break;
+            case GradientDimension::GRAD_XY:
+                apply_2d_spatial_filter(coord, src, dst_x, TensorShape{ shape_size, shape_size }, masks.at(filter_size).first, 1.f, border_mode, constant_border_value);
+                apply_2d_spatial_filter(coord, src, dst_y, TensorShape{ shape_size, shape_size }, masks.at(filter_size).second, 1.f, border_mode, constant_border_value);
+                break;
+            default:
+                ARM_COMPUTE_ERROR("Gradient dimension not supported");
+        }
+    }
+
+    return std::make_pair(dst_x, dst_y);
+}
+
+template std::pair<SimpleTensor<int16_t>, SimpleTensor<int16_t>> scharr(const SimpleTensor<uint8_t> &src, int filter_size, BorderMode border_mode, uint8_t constant_border_value,
+                                                                        GradientDimension gradient_dimension);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/Sobel.h b/tests/validation/reference/Scharr.h
similarity index 80%
copy from tests/validation/CPP/Sobel.h
copy to tests/validation/reference/Scharr.h
index ab04663..8f38381 100644
--- a/tests/validation/CPP/Sobel.h
+++ b/tests/validation/reference/Scharr.h
@@ -21,11 +21,11 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_SOBEL_H__
-#define __ARM_COMPUTE_TEST_SOBEL_H__
+#ifndef __ARM_COMPUTE_TEST_SCHARR_H__
+#define __ARM_COMPUTE_TEST_SCHARR_H__
 
-#include "arm_compute/core/Types.h"
 #include "tests/SimpleTensor.h"
+#include "tests/Types.h"
 
 namespace arm_compute
 {
@@ -36,9 +36,9 @@
 namespace reference
 {
 template <typename T, typename U>
-std::pair<SimpleTensor<T>, SimpleTensor<T>> sobel(const SimpleTensor<U> &src, int filter_size, BorderMode border_mode, uint8_t constant_border_value = 0);
+std::pair<SimpleTensor<T>, SimpleTensor<T>> scharr(const SimpleTensor<U> &src, int filter_size, BorderMode border_mode, uint8_t constant_border_value, GradientDimension gradient_dimension);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_SOBEL_H__ */
+#endif /* __ARM_COMPUTE_TEST_SCHARR_H__ */
diff --git a/tests/validation/CPP/Sobel.cpp b/tests/validation/reference/Sobel.cpp
similarity index 69%
rename from tests/validation/CPP/Sobel.cpp
rename to tests/validation/reference/Sobel.cpp
index 314fbd4..ff0e11d 100644
--- a/tests/validation/CPP/Sobel.cpp
+++ b/tests/validation/reference/Sobel.cpp
@@ -103,7 +103,7 @@
 } // namespace
 
 template <typename T, typename U>
-std::pair<SimpleTensor<T>, SimpleTensor<T>> sobel(const SimpleTensor<U> &src, int filter_size, BorderMode border_mode, uint8_t constant_border_value)
+std::pair<SimpleTensor<T>, SimpleTensor<T>> sobel(const SimpleTensor<U> &src, int filter_size, BorderMode border_mode, uint8_t constant_border_value, GradientDimension gradient_dimension)
 {
     SimpleTensor<T> dst_x(src.shape(), data_type<T>::value, src.num_channels());
     SimpleTensor<T> dst_y(src.shape(), data_type<T>::value, src.num_channels());
@@ -118,18 +118,34 @@
         {
             continue;
         }
-
-        apply_2d_spatial_filter(coord, src, dst_x, TensorShape{ static_cast<unsigned int>(filter_size), static_cast<unsigned int>(filter_size) }, masks.at(filter_size).first, 1.f, border_mode,
-                                constant_border_value);
-        apply_2d_spatial_filter(coord, src, dst_y, TensorShape{ static_cast<unsigned int>(filter_size), static_cast<unsigned int>(filter_size) }, masks.at(filter_size).second, 1.f, border_mode,
-                                constant_border_value);
+        switch(gradient_dimension)
+        {
+            case GradientDimension::GRAD_X:
+                apply_2d_spatial_filter(coord, src, dst_x, TensorShape{ static_cast<unsigned int>(filter_size), static_cast<unsigned int>(filter_size) }, masks.at(filter_size).first, 1.f, border_mode,
+                                        constant_border_value);
+                break;
+            case GradientDimension::GRAD_Y:
+                apply_2d_spatial_filter(coord, src, dst_y, TensorShape{ static_cast<unsigned int>(filter_size), static_cast<unsigned int>(filter_size) }, masks.at(filter_size).second, 1.f, border_mode,
+                                        constant_border_value);
+                break;
+            case GradientDimension::GRAD_XY:
+                apply_2d_spatial_filter(coord, src, dst_x, TensorShape{ static_cast<unsigned int>(filter_size), static_cast<unsigned int>(filter_size) }, masks.at(filter_size).first, 1.f, border_mode,
+                                        constant_border_value);
+                apply_2d_spatial_filter(coord, src, dst_y, TensorShape{ static_cast<unsigned int>(filter_size), static_cast<unsigned int>(filter_size) }, masks.at(filter_size).second, 1.f, border_mode,
+                                        constant_border_value);
+                break;
+            default:
+                ARM_COMPUTE_ERROR("Gradient dimension not supported");
+        }
     }
 
     return std::make_pair(dst_x, dst_y);
 }
 
-template std::pair<SimpleTensor<int16_t>, SimpleTensor<int16_t>> sobel(const SimpleTensor<uint8_t> &src, int filter_size, BorderMode border_mode, uint8_t constant_border_value);
-template std::pair<SimpleTensor<int>, SimpleTensor<int>> sobel(const SimpleTensor<uint8_t> &src, int filter_size, BorderMode border_mode, uint8_t constant_border_value);
+template std::pair<SimpleTensor<int16_t>, SimpleTensor<int16_t>> sobel(const SimpleTensor<uint8_t> &src, int filter_size, BorderMode border_mode, uint8_t constant_border_value,
+                                                                       GradientDimension gradient_dimension);
+template std::pair<SimpleTensor<int>, SimpleTensor<int>> sobel(const SimpleTensor<uint8_t> &src, int filter_size, BorderMode border_mode, uint8_t constant_border_value,
+                                                               GradientDimension gradient_dimension);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/CPP/Sobel.h b/tests/validation/reference/Sobel.h
similarity index 94%
rename from tests/validation/CPP/Sobel.h
rename to tests/validation/reference/Sobel.h
index ab04663..9099f1a 100644
--- a/tests/validation/CPP/Sobel.h
+++ b/tests/validation/reference/Sobel.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/Types.h"
 #include "tests/SimpleTensor.h"
+#include "tests/Types.h"
 
 namespace arm_compute
 {
@@ -36,7 +37,7 @@
 namespace reference
 {
 template <typename T, typename U>
-std::pair<SimpleTensor<T>, SimpleTensor<T>> sobel(const SimpleTensor<U> &src, int filter_size, BorderMode border_mode, uint8_t constant_border_value = 0);
+std::pair<SimpleTensor<T>, SimpleTensor<T>> sobel(const SimpleTensor<U> &src, int filter_size, BorderMode border_mode, uint8_t constant_border_value, GradientDimension gradient_dimension);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/CPP/SoftmaxLayer.cpp b/tests/validation/reference/SoftmaxLayer.cpp
similarity index 82%
rename from tests/validation/CPP/SoftmaxLayer.cpp
rename to tests/validation/reference/SoftmaxLayer.cpp
index eb76550..90b9b1f 100644
--- a/tests/validation/CPP/SoftmaxLayer.cpp
+++ b/tests/validation/reference/SoftmaxLayer.cpp
@@ -35,7 +35,7 @@
 namespace reference
 {
 template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type>
-SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src)
+SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta)
 {
     // Create reference
     SimpleTensor<T> dst{ src.shape(), src.data_type(), 1, src.fixed_point_position() };
@@ -54,9 +54,9 @@
 
         // Regularize
         T sum(0.f);
-        std::transform(src_row_ptr, src_row_ptr + cols, dst_row_ptr, [&sum, max](T val)
+        std::transform(src_row_ptr, src_row_ptr + cols, dst_row_ptr, [&sum, max, beta](T val)
         {
-            const T res(std::exp(val - max));
+            const T res(std::exp((val - max) * beta));
             sum += res;
             return res;
         });
@@ -72,8 +72,10 @@
 }
 
 template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type>
-SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src)
+SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta)
 {
+    ARM_COMPUTE_UNUSED(beta);
+
     using namespace fixed_point_arithmetic;
 
     // Create reference
@@ -112,10 +114,22 @@
     return dst;
 }
 
-template SimpleTensor<float> softmax_layer(const SimpleTensor<float> &src);
-template SimpleTensor<half> softmax_layer(const SimpleTensor<half> &src);
-template SimpleTensor<qint8_t> softmax_layer(const SimpleTensor<qint8_t> &src);
-template SimpleTensor<qint16_t> softmax_layer(const SimpleTensor<qint16_t> &src);
+template <>
+SimpleTensor<uint8_t> softmax_layer<uint8_t>(const SimpleTensor<uint8_t> &src, float beta)
+{
+    // Note: Output quantization info should always have scale = 1/256 and offset = 0
+    const QuantizationInfo output_quantization_info = QuantizationInfo(1.f / 256, 0);
+
+    SimpleTensor<float>   src_tmp = convert_from_asymmetric(src);
+    SimpleTensor<float>   dst_tmp = softmax_layer<float>(src_tmp, beta);
+    SimpleTensor<uint8_t> dst     = convert_to_asymmetric(dst_tmp, output_quantization_info);
+    return dst;
+}
+
+template SimpleTensor<float> softmax_layer(const SimpleTensor<float> &src, float beta);
+template SimpleTensor<half> softmax_layer(const SimpleTensor<half> &src, float beta);
+template SimpleTensor<qint8_t> softmax_layer(const SimpleTensor<qint8_t> &src, float beta);
+template SimpleTensor<qint16_t> softmax_layer(const SimpleTensor<qint16_t> &src, float beta);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/CPP/SoftmaxLayer.h b/tests/validation/reference/SoftmaxLayer.h
similarity index 92%
rename from tests/validation/CPP/SoftmaxLayer.h
rename to tests/validation/reference/SoftmaxLayer.h
index ab79bc4..a6d4c3b 100644
--- a/tests/validation/CPP/SoftmaxLayer.h
+++ b/tests/validation/reference/SoftmaxLayer.h
@@ -36,10 +36,10 @@
 namespace reference
 {
 template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0>
-SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src);
+SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta);
 
 template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
-SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src);
+SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/CPP/TableLookup.cpp b/tests/validation/reference/TableLookup.cpp
similarity index 100%
rename from tests/validation/CPP/TableLookup.cpp
rename to tests/validation/reference/TableLookup.cpp
diff --git a/tests/validation/CPP/TableLookup.h b/tests/validation/reference/TableLookup.h
similarity index 100%
rename from tests/validation/CPP/TableLookup.h
rename to tests/validation/reference/TableLookup.h
diff --git a/tests/validation/CPP/Threshold.cpp b/tests/validation/reference/Threshold.cpp
similarity index 100%
rename from tests/validation/CPP/Threshold.cpp
rename to tests/validation/reference/Threshold.cpp
diff --git a/tests/validation/CPP/Threshold.h b/tests/validation/reference/Threshold.h
similarity index 100%
rename from tests/validation/CPP/Threshold.h
rename to tests/validation/reference/Threshold.h
diff --git a/tests/validation/reference/Transpose.cpp b/tests/validation/reference/Transpose.cpp
new file mode 100644
index 0000000..736f37e
--- /dev/null
+++ b/tests/validation/reference/Transpose.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Transpose.h"
+
+#include "arm_compute/core/Types.h"
+#include "tests/validation/FixedPoint.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> transpose(const SimpleTensor<T> &src)
+{
+    // Make rows the columns of the original shape
+    TensorShape dst_shape{ src.shape().y(), src.shape().x() };
+
+    // Create reference
+    SimpleTensor<T> dst{ dst_shape, src.data_type() };
+
+    // Compute reference
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        const Coordinates coord = index2coord(src.shape(), i);
+        const Coordinates dst_coord{ coord.y(), coord.x() };
+        const size_t      dst_index = coord2index(dst.shape(), dst_coord);
+
+        dst[dst_index] = src[i];
+    }
+
+    return dst;
+}
+
+template SimpleTensor<uint8_t> transpose(const SimpleTensor<uint8_t> &src);
+template SimpleTensor<uint16_t> transpose(const SimpleTensor<uint16_t> &src);
+template SimpleTensor<uint32_t> transpose(const SimpleTensor<uint32_t> &src);
+template SimpleTensor<half> transpose(const SimpleTensor<half> &src);
+template SimpleTensor<float> transpose(const SimpleTensor<float> &src);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/Scale.h b/tests/validation/reference/Transpose.h
similarity index 82%
copy from tests/validation/CPP/Scale.h
copy to tests/validation/reference/Transpose.h
index 53183ae..3f42f41 100644
--- a/tests/validation/CPP/Scale.h
+++ b/tests/validation/reference/Transpose.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef __ARM_COMPUTE_TEST_SCALE_H__
-#define __ARM_COMPUTE_TEST_SCALE_H__
+#ifndef __ARM_COMPUTE_TEST_TRANSPOSE_H__
+#define __ARM_COMPUTE_TEST_TRANSPOSE_H__
 
 #include "tests/SimpleTensor.h"
 
@@ -35,9 +35,9 @@
 namespace reference
 {
 template <typename T>
-SimpleTensor<T> scale(const SimpleTensor<T> &in, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value = 0);
+SimpleTensor<T> transpose(const SimpleTensor<T> &src);
 } // namespace reference
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_SCALE_H__ */
+#endif /* __ARM_COMPUTE_TEST_TRANSPOSE_H__ */
diff --git a/tests/validation/CPP/Utils.cpp b/tests/validation/reference/Utils.cpp
similarity index 100%
rename from tests/validation/CPP/Utils.cpp
rename to tests/validation/reference/Utils.cpp
diff --git a/tests/validation/CPP/Utils.h b/tests/validation/reference/Utils.h
similarity index 89%
rename from tests/validation/CPP/Utils.h
rename to tests/validation/reference/Utils.h
index 0733411..2aa77c6 100644
--- a/tests/validation/CPP/Utils.h
+++ b/tests/validation/reference/Utils.h
@@ -41,6 +41,21 @@
 {
 namespace validation
 {
+/** Checks if a pixel has valid coordinates
+ *
+ * @param x            X coordinate
+ * @param y            Y coordinate
+ * @param width        Width of the image
+ * @param height       Height of the image
+ * @param border_size  Border size
+ *
+ * @return True if pixel is valid else false
+ */
+inline bool is_valid_pixel_index(int x, int y, int width, int height, int border_size)
+{
+    return ((x >= -border_size) && (y >= -border_size) && (x < (width + border_size)) && (y < height + border_size));
+}
+
 // Return a tensor element at a specified coordinate with different border modes
 template <typename T>
 T tensor_elem_at(const SimpleTensor<T> &src, Coordinates coord, BorderMode border_mode, T constant_border_value)
diff --git a/tests/validation/reference/UtilsQuantizedAsymm.h b/tests/validation/reference/UtilsQuantizedAsymm.h
new file mode 100644
index 0000000..b7b69d5
--- /dev/null
+++ b/tests/validation/reference/UtilsQuantizedAsymm.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_VALIDATION_UTILS_QUANTIZED_ASYMM_H__
+#define __ARM_COMPUTE_TEST_VALIDATION_UTILS_QUANTIZED_ASYMM_H__
+
+#include <cstdint>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+/** Rounded to nearest division by a power-of-two. */
+inline int32_t asymm_rounding_divide_by_pow2(int32_t x, int exponent)
+{
+    const int32_t mask      = (1 << exponent) - 1;
+    const int32_t threshold = (mask >> 1) + (x < 0 ? 1 : 0);
+    return (x >> exponent) + ((x & mask) > threshold ? 1 : 0);
+}
+
+/** Multiplication of two integers. The same as ARMv7 NEON VQRDMULH instruction. */
+inline int32_t asymm_int_mult(int32_t a, int32_t b)
+{
+    bool    overflow = a == b && a == std::numeric_limits<int32_t>::min();
+    int64_t a_64(a);
+    int64_t b_64(b);
+    int64_t ab_64        = a_64 * b_64;
+    int32_t nudge        = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
+    int32_t ab_x2_high32 = static_cast<int32_t>((ab_64 + nudge) / (1ll << 31));
+    return overflow ? std::numeric_limits<int32_t>::max() : ab_x2_high32;
+}
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_VALIDATION_UTILS_QUANTIZED_ASYMM_H__ */
diff --git a/tests/validation/CPP/WarpAffine.cpp b/tests/validation/reference/WarpAffine.cpp
similarity index 100%
rename from tests/validation/CPP/WarpAffine.cpp
rename to tests/validation/reference/WarpAffine.cpp
diff --git a/tests/validation/CPP/WarpAffine.h b/tests/validation/reference/WarpAffine.h
similarity index 100%
rename from tests/validation/CPP/WarpAffine.h
rename to tests/validation/reference/WarpAffine.h
diff --git a/tests/validation/CPP/WarpPerspective.cpp b/tests/validation/reference/WarpPerspective.cpp
similarity index 100%
rename from tests/validation/CPP/WarpPerspective.cpp
rename to tests/validation/reference/WarpPerspective.cpp
diff --git a/tests/validation/CPP/WarpPerspective.h b/tests/validation/reference/WarpPerspective.h
similarity index 100%
rename from tests/validation/CPP/WarpPerspective.h
rename to tests/validation/reference/WarpPerspective.h