arm_compute v18.01

Change-Id: I9bfa178c2e38bfd5fc812e62aab6760d87748e05
diff --git a/tests/AssetsLibrary.h b/tests/AssetsLibrary.h
index 8e32fd5..1fe8c63 100644
--- a/tests/AssetsLibrary.h
+++ b/tests/AssetsLibrary.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -275,6 +275,17 @@
      */
     void fill(RawTensor &raw, const std::string &name, Format format, Channel channel) const;
 
+    /** Fills the specified @p tensor with the content of the raw tensor.
+     *
+     * @param[in, out] tensor To be filled tensor.
+     * @param[in]      raw    Raw tensor used to fill the tensor.
+     *
+     * @warning No check is performed that the specified format actually
+     *          matches the format of the tensor.
+     */
+    template <typename T>
+    void fill(T &&tensor, RawTensor raw) const;
+
     /** Fill a tensor with uniform distribution across the range of its type
      *
      * @param[in, out] tensor      To be filled tensor.
@@ -474,6 +485,19 @@
 }
 
 template <typename T>
+void AssetsLibrary::fill(T &&tensor, RawTensor raw) const
+{
+    for(size_t offset = 0; offset < raw.size(); offset += raw.element_size())
+    {
+        const Coordinates id = index2coord(raw.shape(), offset / raw.element_size());
+
+        const RawTensor::value_type *const raw_ptr = raw.data() + offset;
+        const auto                         out_ptr = static_cast<RawTensor::value_type *>(tensor(id));
+        std::copy_n(raw_ptr, raw.element_size(), out_ptr);
+    }
+}
+
+template <typename T>
 void AssetsLibrary::fill_tensor_uniform(T &&tensor, std::random_device::result_type seed_offset) const
 {
     switch(tensor.data_type())
diff --git a/tests/CL/Helper.h b/tests/CL/Helper.h
new file mode 100644
index 0000000..3f19d61
--- /dev/null
+++ b/tests/CL/Helper.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_CL_HELPER_H__
+#define __ARM_COMPUTE_TEST_CL_HELPER_H__
+
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include "support/ToolchainSupport.h"
+
+namespace arm_compute
+{
+namespace test
+{
+// This template synthetizes an ICLSimpleFunction which runs the given kernel K
+template <typename K>
+class CLSynthetizeFunction : public ICLSimpleFunction
+{
+public:
+    template <typename... Args>
+    void configure(Args &&... args)
+    {
+        auto k = arm_compute::support::cpp14::make_unique<K>();
+        k->configure(std::forward<Args>(args)...);
+        _kernel = std::move(k);
+    }
+};
+
+// As above but this also setups a Zero border on the input tensor of the specified bordersize
+template <typename K, int bordersize>
+class CLSynthetizeFunctionWithZeroConstantBorder : public ICLSimpleFunction
+{
+public:
+    template <typename T, typename... Args>
+    void configure(T first, Args &&... args)
+    {
+        auto k = arm_compute::support::cpp14::make_unique<K>();
+        k->configure(first, std::forward<Args>(args)...);
+        _kernel = std::move(k);
+        _border_handler.configure(first, BorderSize(bordersize), BorderMode::CONSTANT, PixelValue(0));
+    }
+};
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_CL_HELPER_H__ */
diff --git a/tests/GLES_COMPUTE/Helper.h b/tests/GLES_COMPUTE/Helper.h
index 5f6460a..c2cd7c0 100644
--- a/tests/GLES_COMPUTE/Helper.h
+++ b/tests/GLES_COMPUTE/Helper.h
@@ -91,23 +91,6 @@
     std::cout << name << ":" << std::endl;
     std::cout << s.str().c_str();
     t.unmap();
-
-    return;
-}
-
-/** Helper to sync tensor, if tensor is not used, GPU have optimized the operation.
- *
- * @param[in] tensor Tensor to be sync.
- *
- * @return Empty @ref GCTensor with the specified shape and data type.
- */
-inline void force_sync_tensor(ITensor &tensor)
-{
-    IGCTensor &t = dynamic_cast<IGCTensor &>(tensor);
-    t.map();
-    t.unmap();
-
-    return;
 }
 } // namespace gles_compute
 } // namespace test
diff --git a/tests/SConscript b/tests/SConscript
index e84e275..b2ca676 100644
--- a/tests/SConscript
+++ b/tests/SConscript
@@ -50,6 +50,9 @@
 
 Help(new_options.GenerateHelpText(test_env))
 
+Import("arm_compute_test_framework")
+test_env.Append(LIBS = arm_compute_test_framework)
+
 if env['os'] in ['android', 'bare_metal'] or env['standalone']:
     Import("arm_compute_a")
     Import("arm_compute_core_a")
@@ -65,9 +68,6 @@
 test_env.Append(LIBPATH = ["#build/%s" % env['build_dir']])
 test_env.Append(LIBPATH = ["#build/%s/opencl-1.2-stubs" % env['build_dir']])
 
-Import("arm_compute_test_framework")
-test_env.Append(LIBS = arm_compute_test_framework)
-
 common_files = Glob('*.cpp')
 common_objects = [test_env.StaticObject(f) for f in common_files]
 
diff --git a/tests/SimpleTensor.h b/tests/SimpleTensor.h
index 6091991..902f5b5 100644
--- a/tests/SimpleTensor.h
+++ b/tests/SimpleTensor.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -191,7 +191,8 @@
       _fixed_point_position(fixed_point_position),
       _quantization_info()
 {
-    _buffer = support::cpp14::make_unique<T[]>(num_elements() * num_channels());
+    _num_channels = num_channels();
+    _buffer       = support::cpp14::make_unique<T[]>(num_elements() * _num_channels);
 }
 
 template <typename T>
@@ -338,13 +339,13 @@
 template <typename T>
 const void *SimpleTensor<T>::operator()(const Coordinates &coord) const
 {
-    return _buffer.get() + coord2index(_shape, coord);
+    return _buffer.get() + coord2index(_shape, coord) * _num_channels;
 }
 
 template <typename T>
 void *SimpleTensor<T>::operator()(const Coordinates &coord)
 {
-    return _buffer.get() + coord2index(_shape, coord);
+    return _buffer.get() + coord2index(_shape, coord) * _num_channels;
 }
 
 template <typename U>
diff --git a/tests/Utils.h b/tests/Utils.h
index df1d7a5..750d907 100644
--- a/tests/Utils.h
+++ b/tests/Utils.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,11 +27,23 @@
 #include "arm_compute/core/Coordinates.h"
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/FixedPoint.h"
+#include "arm_compute/core/HOGInfo.h"
+#include "arm_compute/core/Size2D.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
 #include "support/ToolchainSupport.h"
 
+#ifdef ARM_COMPUTE_CL
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#endif /* ARM_COMPUTE_CL */
+
+#ifdef ARM_COMPUTE_GC
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#endif /* ARM_COMPUTE_GC */
+
 #include <cmath>
 #include <cstddef>
 #include <limits>
@@ -44,6 +56,9 @@
 
 namespace arm_compute
 {
+#ifdef ARM_COMPUTE_CL
+class CLTensor;
+#endif /* ARM_COMPUTE_CL */
 namespace test
 {
 /** Round floating-point value with half value rounding to positive infinity.
@@ -187,17 +202,19 @@
 
 /** Create a valid region based on tensor shape, border mode and border size
  *
- * @param[in] shape            Shape used as size of the valid region.
+ * @param[in] a_shape          Shape used as size of the valid region.
  * @param[in] border_undefined (Optional) Boolean indicating if the border mode is undefined.
  * @param[in] border_size      (Optional) Border size used to specify the region to exclude.
  *
  * @return A valid region starting at (0, 0, ...) with size of @p shape if @p border_undefined is false; otherwise
  *  return A valid region starting at (@p border_size.left, @p border_size.top, ...) with reduced size of @p shape.
  */
-inline ValidRegion shape_to_valid_region(TensorShape shape, bool border_undefined = false, BorderSize border_size = BorderSize(0))
+inline ValidRegion shape_to_valid_region(const TensorShape &a_shape, bool border_undefined = false, BorderSize border_size = BorderSize(0))
 {
-    Coordinates anchor;
-    anchor.set_num_dimensions(shape.num_dimensions());
+    ValidRegion valid_region{ Coordinates(), a_shape };
+
+    Coordinates &anchor = valid_region.anchor;
+    TensorShape &shape  = valid_region.shape;
 
     if(border_undefined)
     {
@@ -213,43 +230,45 @@
         shape.set(1, valid_shape_y);
     }
 
-    return ValidRegion(std::move(anchor), std::move(shape));
+    return valid_region;
 }
 
 /** Create a valid region for Gaussian Pyramid Half based on tensor shape and valid region at level "i - 1" and border mode
  *
  * @note The border size is 2 in case of Gaussian Pyramid Half
  *
- * @param[in] shape            Shape used at level "i - 1" of Gaussian Pyramid Half
- * @param[in] valid_region     Valid region used at level "i - 1" of Gaussian Pyramid Half
+ * @param[in] a_shape          Shape used at level "i - 1" of Gaussian Pyramid Half
+ * @param[in] a_valid_region   Valid region used at level "i - 1" of Gaussian Pyramid Half
  * @param[in] border_undefined (Optional) Boolean indicating if the border mode is undefined.
  *
  *  return The valid region for the level "i" of Gaussian Pyramid Half
  */
-inline ValidRegion shape_to_valid_region_gaussian_pyramid_half(TensorShape shape, ValidRegion valid_region, bool border_undefined = false)
+inline ValidRegion shape_to_valid_region_gaussian_pyramid_half(const TensorShape &a_shape, const ValidRegion &a_valid_region, bool border_undefined = false)
 {
     constexpr int border_size = 2;
-    Coordinates   anchor;
-    anchor.set_num_dimensions(shape.num_dimensions());
+
+    ValidRegion valid_region{ Coordinates(), a_shape };
+
+    Coordinates &anchor = valid_region.anchor;
+    TensorShape &shape  = valid_region.shape;
 
     // Compute tensor shape for level "i" of Gaussian Pyramid Half
     // dst_width  = (src_width + 1) * 0.5f
     // dst_height = (src_height + 1) * 0.5f
-    TensorShape dst_shape = shape;
-    dst_shape.set(0, (shape[0] + 1) * 0.5f);
-    dst_shape.set(1, (shape[1] + 1) * 0.5f);
+    shape.set(0, (shape[0] + 1) * 0.5f);
+    shape.set(1, (shape[1] + 1) * 0.5f);
 
     if(border_undefined)
     {
         ARM_COMPUTE_ERROR_ON(shape.num_dimensions() < 2);
 
         // Compute the left and top invalid borders
-        float invalid_border_left = static_cast<float>(valid_region.anchor.x() + border_size) / 2.0f;
-        float invalid_border_top  = static_cast<float>(valid_region.anchor.y() + border_size) / 2.0f;
+        float invalid_border_left = static_cast<float>(a_valid_region.anchor.x() + border_size) / 2.0f;
+        float invalid_border_top  = static_cast<float>(a_valid_region.anchor.y() + border_size) / 2.0f;
 
         // For the new anchor point we can have 2 cases:
-        // 1) If the width/height of the tensor shape is odd, we have to take the ceil value of (valid_region.anchor.x() + border_size) / 2.0f or (valid_region.anchor.y() + border_size / 2.0f
-        // 2) If the width/height of the tensor shape is even, we have to take the floor value of (valid_region.anchor.x() + border_size) / 2.0f or (valid_region.anchor.y() + border_size) / 2.0f
+        // 1) If the width/height of the tensor shape is odd, we have to take the ceil value of (a_valid_region.anchor.x() + border_size) / 2.0f or (a_valid_region.anchor.y() + border_size / 2.0f
+        // 2) If the width/height of the tensor shape is even, we have to take the floor value of (a_valid_region.anchor.x() + border_size) / 2.0f or (a_valid_region.anchor.y() + border_size) / 2.0f
         // In this manner we should be able to propagate correctly the valid region along all levels of the pyramid
         invalid_border_left = (shape[0] % 2) ? std::ceil(invalid_border_left) : std::floor(invalid_border_left);
         invalid_border_top  = (shape[1] % 2) ? std::ceil(invalid_border_top) : std::floor(invalid_border_top);
@@ -260,21 +279,21 @@
 
         // Compute shape
         // Calculate the right and bottom invalid borders at the previous level of the pyramid
-        const float prev_invalid_border_right  = static_cast<float>(shape[0] - (valid_region.anchor.x() + valid_region.shape[0]));
-        const float prev_invalid_border_bottom = static_cast<float>(shape[1] - (valid_region.anchor.y() + valid_region.shape[1]));
+        const float prev_invalid_border_right  = static_cast<float>(shape[0] - (a_valid_region.anchor.x() + a_valid_region.shape[0]));
+        const float prev_invalid_border_bottom = static_cast<float>(shape[1] - (a_valid_region.anchor.y() + a_valid_region.shape[1]));
 
         // Calculate the right and bottom invalid borders at the current level of the pyramid
         const float invalid_border_right  = std::ceil((prev_invalid_border_right + static_cast<float>(border_size)) / 2.0f);
         const float invalid_border_bottom = std::ceil((prev_invalid_border_bottom + static_cast<float>(border_size)) / 2.0f);
 
-        const int valid_shape_x = std::max(0, static_cast<int>(dst_shape.x()) - static_cast<int>(invalid_border_left) - static_cast<int>(invalid_border_right));
-        const int valid_shape_y = std::max(0, static_cast<int>(dst_shape.y()) - static_cast<int>(invalid_border_top) - static_cast<int>(invalid_border_bottom));
+        const int valid_shape_x = std::max(0, static_cast<int>(shape.x()) - static_cast<int>(invalid_border_left) - static_cast<int>(invalid_border_right));
+        const int valid_shape_y = std::max(0, static_cast<int>(shape.y()) - static_cast<int>(invalid_border_top) - static_cast<int>(invalid_border_bottom));
 
-        dst_shape.set(0, valid_shape_x);
-        dst_shape.set(1, valid_shape_y);
+        shape.set(0, valid_shape_x);
+        shape.set(1, valid_shape_y);
     }
 
-    return ValidRegion(std::move(anchor), std::move(dst_shape));
+    return valid_region;
 }
 
 /** Write the value after casting the pointer according to @p data_type.
@@ -463,6 +482,48 @@
     return tensor;
 }
 
+/** Create and initialize a tensor of the given type.
+ *
+ * @param[in] shape  Tensor shape.
+ * @param[in] format Format type.
+ *
+ * @return Initialized tensor of given type.
+ */
+template <typename T>
+inline T create_tensor(const TensorShape &shape, Format format)
+{
+    TensorInfo info(shape, format);
+
+    T tensor;
+    tensor.allocator()->init(info);
+
+    return tensor;
+}
+
+/** Create and initialize a HOG (Histogram of Oriented Gradients) of the given type.
+ *
+ * @param[in] cell_size             Cell size in pixels
+ * @param[in] block_size            Block size in pixels. Must be a multiple of cell_size.
+ * @param[in] detection_window_size Detection window size in pixels. Must be a multiple of block_size and block_stride.
+ * @param[in] block_stride          Distance in pixels between 2 consecutive blocks along the x and y direction. Must be a multiple of cell size
+ * @param[in] num_bins              Number of histogram bins for each cell
+ * @param[in] normalization_type    (Optional) Normalization type to use for each block
+ * @param[in] l2_hyst_threshold     (Optional) Threshold used for L2HYS_NORM normalization method
+ * @param[in] phase_type            (Optional) Type of @ref PhaseType
+ *
+ * @return Initialized HOG of given type.
+ */
+template <typename T>
+inline T create_HOG(const Size2D &cell_size, const Size2D &block_size, const Size2D &detection_window_size, const Size2D &block_stride, size_t num_bins,
+                    HOGNormType normalization_type = HOGNormType::L2HYS_NORM, float l2_hyst_threshold = 0.2f, PhaseType phase_type = PhaseType::UNSIGNED)
+{
+    T       hog;
+    HOGInfo hog_info(cell_size, block_size, block_size, block_stride, num_bins, normalization_type, l2_hyst_threshold, phase_type);
+    hog.init(hog_info);
+
+    return hog;
+}
+
 /** Create a vector of random ROIs.
  *
  * @param[in] shape     The shape of the input tensor.
@@ -566,6 +627,39 @@
             ARM_COMPUTE_ERROR("NOT SUPPORTED!");
     }
 }
+
+/** Sync if necessary.
+ */
+template <typename TensorType>
+inline void sync_if_necessary()
+{
+#ifdef ARM_COMPUTE_CL
+    if(opencl_is_available() && std::is_same<typename std::decay<TensorType>::type, arm_compute::CLTensor>::value)
+    {
+        CLScheduler::get().sync();
+    }
+#endif /* ARM_COMPUTE_CL */
+}
+
+/** Sync tensor if necessary.
+ *
+ * @note: If the destination tensor not being used on OpenGL ES, GPU will optimize out the operation.
+ *
+ * @param[in] tensor Tensor to be sync.
+ */
+template <typename TensorType>
+inline void sync_tensor_if_necessary(TensorType &tensor)
+{
+#ifdef ARM_COMPUTE_GC
+    if(opengles31_is_available() && std::is_same<typename std::decay<TensorType>::type, arm_compute::GCTensor>::value)
+    {
+        // Force sync the tensor by calling map and unmap.
+        IGCTensor &t = dynamic_cast<IGCTensor &>(tensor);
+        t.map();
+        t.unmap();
+    }
+#endif /* ARM_COMPUTE_GC */
+}
 } // namespace test
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_TEST_UTILS_H__ */
diff --git a/tests/benchmark/CL/ActivationLayer.cpp b/tests/benchmark/CL/ActivationLayer.cpp
index e919c78..2e4641b 100644
--- a/tests/benchmark/CL/ActivationLayer.cpp
+++ b/tests/benchmark/CL/ActivationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,6 +32,7 @@
 #include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1ActivationLayerDataset.h"
 #include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4ActivationLayerDataset.h"
 #include "tests/datasets/system_tests/lenet5/LeNet5ActivationLayerDataset.h"
+#include "tests/datasets/system_tests/mobilenet/MobileNetActivationLayerDataset.h"
 #include "tests/datasets/system_tests/squeezenet/SqueezeNetActivationLayerDataset.h"
 #include "tests/datasets/system_tests/vgg/vgg16/VGG16ActivationLayerDataset.h"
 #include "tests/datasets/system_tests/yolo/v2/YOLOV2ActivationLayerDataset.h"
@@ -45,7 +46,8 @@
 {
 namespace
 {
-const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8, DataType::QS16 });
+const auto data_types           = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8, DataType::QS16 });
+const auto data_types_mobilenet = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8, DataType::QS16, DataType::QASYMM8 });
 } // namespace
 
 using CLActivationLayerFixture = ActivationLayerFixture<CLTensor, CLActivationLayer, CLAccessor>;
@@ -62,6 +64,11 @@
                                                                                         data_types),
                                                             framework::dataset::make("Batches", 1)));
 
+REGISTER_FIXTURE_DATA_TEST_CASE(MobileNetActivationLayer, CLActivationLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::MobileNetActivationLayerDataset(),
+                                                                                        data_types_mobilenet),
+                                                            framework::dataset::make("Batches", 1)));
+
 REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1ActivationLayer, CLActivationLayerFixture, framework::DatasetMode::ALL,
                                 framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1ActivationLayerDataset(),
                                                                                         data_types),
@@ -98,6 +105,11 @@
                                                                                         data_types),
                                                             framework::dataset::make("Batches", { 4, 8 })));
 
+REGISTER_FIXTURE_DATA_TEST_CASE(MobileNetActivationLayer, CLActivationLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::MobileNetActivationLayerDataset(),
+                                                                                        data_types_mobilenet),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
 REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1ActivationLayer, CLActivationLayerFixture, framework::DatasetMode::NIGHTLY,
                                 framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1ActivationLayerDataset(),
                                                                                         data_types),
diff --git a/tests/benchmark/CL/BatchNormalizationLayer.cpp b/tests/benchmark/CL/BatchNormalizationLayer.cpp
index af88278..0fc8727 100644
--- a/tests/benchmark/CL/BatchNormalizationLayer.cpp
+++ b/tests/benchmark/CL/BatchNormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,7 +40,7 @@
 {
 namespace
 {
-const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8, DataType::QS16 });
+const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
 } // namespace
 
 using CLBatchNormalizationLayerFixture = BatchNormalizationLayerFixture<CLTensor, CLBatchNormalizationLayer, CLAccessor>;
diff --git a/tests/benchmark/CL/ConvolutionLayer.cpp b/tests/benchmark/CL/ConvolutionLayer.cpp
index 4467765..e163dad 100644
--- a/tests/benchmark/CL/ConvolutionLayer.cpp
+++ b/tests/benchmark/CL/ConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -45,7 +45,7 @@
 {
 namespace
 {
-const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8, DataType::QS16 });
+const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
 } // namespace
 
 using CLConvolutionLayerFixture = ConvolutionLayerFixture<CLTensor, CLConvolutionLayer, CLAccessor>;
diff --git a/tests/benchmark/CL/DepthConcatenateLayer.cpp b/tests/benchmark/CL/DepthConcatenateLayer.cpp
new file mode 100644
index 0000000..3a5c457
--- /dev/null
+++ b/tests/benchmark/CL/DepthConcatenateLayer.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/benchmark/fixtures/DepthConcatenateLayerFixture.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
+} // namespace
+
+using CLDepthConcatenateLayerFixture = DepthConcatenateLayerFixture<CLTensor, ICLTensor, CLDepthConcatenateLayer, CLAccessor>;
+
+TEST_SUITE(CL)
+TEST_SUITE(DepthConcatenateLayer)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, framework::dataset::combine(datasets::Small2DShapes(), data_types));
+REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, framework::dataset::combine(datasets::DepthConcatenateLayerShapes(), data_types));
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/CL/DepthwiseConvolutionLayer.cpp b/tests/benchmark/CL/DepthwiseConvolutionLayer.cpp
index be6fba0..1073cff 100644
--- a/tests/benchmark/CL/DepthwiseConvolutionLayer.cpp
+++ b/tests/benchmark/CL/DepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,7 +28,7 @@
 #include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h"
-#include "tests/datasets/MobileNetDepthwiseConvolutionLayerDataset.h"
+#include "tests/datasets/system_tests/mobilenet/MobileNetDepthwiseConvolutionLayerDataset.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "utils/TypePrinter.h"
@@ -37,12 +37,12 @@
 {
 namespace test
 {
-const auto data_types                    = framework::dataset::make("DataType", { DataType::F32 });
-using CLDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerFixture<CLTensor, CLDepthwiseConvolutionLayer, CLAccessor>;
+const auto data_types                    = framework::dataset::make("DataType", { DataType::F32, DataType::QASYMM8 });
+using CLDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerFixture<CLTensor, CLDepthwiseConvolutionLayer3x3, CLAccessor>;
 
 TEST_SUITE(CL)
 
-REGISTER_FIXTURE_DATA_TEST_CASE(MobileNetDepthwiseConvolutionLayer, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL,
+REGISTER_FIXTURE_DATA_TEST_CASE(MobileNetDepthwiseConvLayer, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL,
                                 framework::dataset::combine(framework::dataset::combine(datasets::MobileNetDepthwiseConvolutionLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", { 1 })));
 
diff --git a/tests/benchmark/CL/DepthwiseSeparableConvolutionLayer.cpp b/tests/benchmark/CL/DepthwiseSeparableConvolutionLayer.cpp
index 7f5bb00..82c506f 100644
--- a/tests/benchmark/CL/DepthwiseSeparableConvolutionLayer.cpp
+++ b/tests/benchmark/CL/DepthwiseSeparableConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,7 +28,7 @@
 #include "arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/benchmark/fixtures/DepthwiseSeparableConvolutionLayerFixture.h"
-#include "tests/datasets/MobileNetDepthwiseSeparableConvolutionLayerDataset.h"
+#include "tests/datasets/system_tests/mobilenet/MobileNetDepthwiseSeparableConvolutionLayerDataset.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "utils/TypePrinter.h"
diff --git a/tests/benchmark/CL/DequantizationLayer.cpp b/tests/benchmark/CL/DequantizationLayer.cpp
new file mode 100644
index 0000000..d34034e
--- /dev/null
+++ b/tests/benchmark/CL/DequantizationLayer.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLDequantizationLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/benchmark/fixtures/DequantizationLayerFixture.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto data_types_src = framework::dataset::make("DataType", { DataType::U8 });
+const auto data_types_dst = framework::dataset::make("DataType", { DataType::F32 });
+} // namespace
+
+using CLDequantizationLayerFixture = DequantizationLayerFixture<CLTensor, CLDequantizationLayer, CLAccessor>;
+
+TEST_SUITE(CL)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(DequantizationLayer, CLDequantizationLayerFixture,
+                                framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::Small3DShapes(), data_types_src), data_types_dst));
+
+TEST_SUITE_END()
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/CL/FlattenLayer.cpp b/tests/benchmark/CL/FlattenLayer.cpp
new file mode 100644
index 0000000..e7e5a47
--- /dev/null
+++ b/tests/benchmark/CL/FlattenLayer.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/benchmark/fixtures/FlattenLayerFixture.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
+} // namespace
+
+using CLFlattenLayerFixture = FlattenLayerFixture<CLTensor, CLFlattenLayer, CLAccessor>;
+
+TEST_SUITE(CL)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(Flatten, CLFlattenLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(datasets::Small4DShapes(), data_types));
+
+TEST_SUITE_END()
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/CL/FullyConnectedLayer.cpp b/tests/benchmark/CL/FullyConnectedLayer.cpp
index e5f3217..4821885 100644
--- a/tests/benchmark/CL/FullyConnectedLayer.cpp
+++ b/tests/benchmark/CL/FullyConnectedLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,7 +43,7 @@
 {
 namespace
 {
-const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8, DataType::QS16 });
+const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
 } // namespace
 
 using CLFullyConnectedLayerFixture = FullyConnectedLayerFixture<CLTensor, CLFullyConnectedLayer, CLAccessor>;
diff --git a/tests/benchmark/CL/GEMM.cpp b/tests/benchmark/CL/GEMM.cpp
index e0c54f7..615b492 100644
--- a/tests/benchmark/CL/GEMM.cpp
+++ b/tests/benchmark/CL/GEMM.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,6 +27,7 @@
 #include "arm_compute/runtime/CL/functions/CLGEMM.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/benchmark/fixtures/GEMMFixture.h"
+#include "tests/datasets/AlexNetGEMMDataset.h"
 #include "tests/datasets/GoogleNetGEMMDataset.h"
 #include "tests/datasets/MatrixMultiplyGEMMDataset.h"
 #include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1GEMMDataset.h"
@@ -50,6 +51,7 @@
 REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1GEMM, CLGEMMFixture, framework::DatasetMode::ALL, framework::dataset::combine(datasets::GoogLeNetInceptionV1GEMMDataset(), data_types));
 REGISTER_FIXTURE_DATA_TEST_CASE(MatrixMultiplyGEMM, CLGEMMFixture, framework::DatasetMode::ALL, framework::dataset::combine(datasets::MatrixMultiplyGEMMDataset(), data_types));
 REGISTER_FIXTURE_DATA_TEST_CASE(GoogleNetGEMM, CLGEMMFixture, framework::DatasetMode::NIGHTLY, framework::dataset::combine(datasets::GoogleNetGEMMDataset(), data_types));
+REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetGEMM, CLGEMMFixture, framework::DatasetMode::NIGHTLY, framework::dataset::combine(datasets::AlexNetGEMMDataset(), data_types));
 
 TEST_SUITE_END()
 } // namespace test
diff --git a/tests/benchmark/CL/GEMMInterleave4x4.cpp b/tests/benchmark/CL/GEMMInterleave4x4.cpp
new file mode 100644
index 0000000..eb7f6d6
--- /dev/null
+++ b/tests/benchmark/CL/GEMMInterleave4x4.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMInterleave4x4.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/benchmark/fixtures/GEMMInterleave4x4Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto data_shapes = framework::dataset::make("M", 8, 12) * framework::dataset::make("N", 8, 12);
+const auto data_types  = framework::dataset::make("DataType", { DataType::U8, DataType::U16, DataType::U32 });
+} // namespace
+
+using CLGEMMInterleave4x4Fixture = GEMMInterleave4x4Fixture<CLTensor, CLGEMMInterleave4x4, CLAccessor>;
+
+TEST_SUITE(CL)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GEMMInterleave4x4, CLGEMMInterleave4x4Fixture, framework::DatasetMode::ALL, data_shapes *data_types);
+
+TEST_SUITE_END()
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/CL/GEMMLowp.cpp b/tests/benchmark/CL/GEMMLowp.cpp
index 039695b..1138339 100644
--- a/tests/benchmark/CL/GEMMLowp.cpp
+++ b/tests/benchmark/CL/GEMMLowp.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,6 +28,7 @@
 #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/benchmark/fixtures/GEMMLowpFixture.h"
+#include "tests/datasets/AlexNetGEMMDataset.h"
 #include "tests/datasets/GoogleNetGEMMDataset.h"
 #include "tests/datasets/MatrixMultiplyGEMMDataset.h"
 #include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1GEMMDataset.h"
@@ -46,6 +47,7 @@
 REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1GEMMLowp, CLGEMMLowpFixture, framework::DatasetMode::ALL, datasets::GoogLeNetInceptionV1GEMMDataset());
 REGISTER_FIXTURE_DATA_TEST_CASE(MatrixMultiplyGEMMLowp, CLGEMMLowpFixture, framework::DatasetMode::ALL, datasets::MatrixMultiplyGEMMDataset());
 REGISTER_FIXTURE_DATA_TEST_CASE(GoogleNetGEMMLowp, CLGEMMLowpFixture, framework::DatasetMode::NIGHTLY, datasets::GoogleNetGEMMDataset());
+REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetGEMMLowp, CLGEMMLowpFixture, framework::DatasetMode::NIGHTLY, datasets::AlexNetGEMMDataset());
 
 TEST_SUITE_END()
 } // namespace test
diff --git a/tests/benchmark/CL/HarrisCorners.cpp b/tests/benchmark/CL/HarrisCorners.cpp
new file mode 100644
index 0000000..990ac24
--- /dev/null
+++ b/tests/benchmark/CL/HarrisCorners.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLArray.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLHarrisCorners.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/benchmark/fixtures/HarrisCornersFixture.h"
+#include "tests/datasets/ImageFileDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto threshold     = framework::dataset::make("Threshold", { 0.00115f });
+const auto min_dist      = framework::dataset::make("MinDist", { 2.f });
+const auto sensitivity   = framework::dataset::make("Sensitivity", { 0.04f });
+const auto gradient_size = framework::dataset::make("GradientSize", { 3, 5, 7 });
+const auto block_size    = framework::dataset::make("BlockSize", { 3, 5, 7 });
+const auto border_mode   = framework::dataset::make("BorderMode", { BorderMode::UNDEFINED, BorderMode::CONSTANT, BorderMode::REPLICATE });
+} // namespace
+
+using CLHarrisCornersFixture = HarrisCornersFixture<CLTensor, CLHarrisCorners, CLAccessor, CLKeyPointArray>;
+
+TEST_SUITE(CL)
+TEST_SUITE(HarrisCorners)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, CLHarrisCornersFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(combine(datasets::SmallImageFiles(),
+                                                                                                                     framework::dataset::make("Format", { Format::U8 })),
+                                                                                                                     threshold),
+                                                                                                                     min_dist),
+                                                                                                                     sensitivity),
+                                                                                                                     gradient_size),
+                                                                                                                     block_size),
+                                                                                                                     border_mode),
+                                                                                                             framework::dataset::make("UseFP16", { false })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, CLHarrisCornersFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine(datasets::LargeImageFiles(),
+                                                                                                                   framework::dataset::make("Format", { Format::U8 })),
+                                                                                                                   threshold),
+                                                                                                                   min_dist),
+                                                                                                                   sensitivity),
+                                                                                                                   gradient_size),
+                                                                                                                   block_size),
+                                                                                                                   border_mode),
+                                                                                                           framework::dataset::make("UseFP16", { false })));
+
+TEST_SUITE_END() // HarrisCorners
+TEST_SUITE_END() // CL
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
\ No newline at end of file
diff --git a/tests/benchmark/CL/L2NormalizeLayer.cpp b/tests/benchmark/CL/L2NormalizeLayer.cpp
new file mode 100644
index 0000000..c88792c
--- /dev/null
+++ b/tests/benchmark/CL/L2NormalizeLayer.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/benchmark/fixtures/L2NormalizeLayerFixture.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
+} // namespace
+
+using CLL2NormalizeLayerFixture = L2NormalizeLayerFixture<CLTensor, CLL2NormalizeLayer, CLAccessor>;
+
+TEST_SUITE(CL)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(L2NormalizeLayer, CLL2NormalizeLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::SmallShapes(), data_types), framework::dataset::make("Axis", { 0 })));
+TEST_SUITE(NIGHTLY)
+REGISTER_FIXTURE_DATA_TEST_CASE(L2NormalizeLayer, CLL2NormalizeLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::LargeShapes(), data_types), framework::dataset::make("Axis", { 0 })));
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/CL/Magnitude.cpp b/tests/benchmark/CL/Magnitude.cpp
new file mode 100644
index 0000000..5e75083
--- /dev/null
+++ b/tests/benchmark/CL/Magnitude.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLMagnitude.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/benchmark/fixtures/MagnitudeFixture.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto magnitude_types = framework::dataset::make("MagnitudeType", { MagnitudeType::L1NORM, MagnitudeType::L2NORM });
+} // namespace
+
+using CLMagnitudeFixture = MagnitudeFixture<CLTensor, CLMagnitude, CLAccessor>;
+
+TEST_SUITE(CL)
+TEST_SUITE(Magnitude)
+REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, CLMagnitudeFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallImageShapes(), framework::dataset::make("Format", { Format::S16, Format::S32 })),
+                                                                                                                 magnitude_types),
+                                                                                                         framework::dataset::make("UseFP16", { false })));
+REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, CLMagnitudeFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeImageShapes(), framework::dataset::make("Format", { Format::S16, Format::S32 })),
+                                                                                                               magnitude_types),
+                                                                                                       framework::dataset::make("UseFP16", { false })));
+TEST_SUITE_END() // Magnitude
+TEST_SUITE_END() // CL
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/CL/NormalizationLayer.cpp b/tests/benchmark/CL/NormalizationLayer.cpp
index 8dd5f62..75a98b6 100644
--- a/tests/benchmark/CL/NormalizationLayer.cpp
+++ b/tests/benchmark/CL/NormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,7 +40,7 @@
 {
 namespace
 {
-const auto data_types = framework::dataset::make("DataType", { DataType::QS8, DataType::QS16, DataType::F16, DataType::F32 });
+const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
 } // namespace
 
 using CLNormalizationLayerFixture = NormalizationLayerFixture<CLTensor, CLNormalizationLayer, CLAccessor>;
diff --git a/tests/benchmark/CL/QuantizationLayer.cpp b/tests/benchmark/CL/QuantizationLayer.cpp
new file mode 100644
index 0000000..2dc775a
--- /dev/null
+++ b/tests/benchmark/CL/QuantizationLayer.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLQuantizationLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/benchmark/fixtures/QuantizationLayerFixture.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
+} // namespace
+
+using CLQuantizationLayerFixture = QuantizationLayerFixture<CLTensor, CLQuantizationLayer, CLAccessor>;
+
+TEST_SUITE(CL)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(QuantizationLayer, CLQuantizationLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(datasets::Small3DShapes(), data_types));
+
+TEST_SUITE_END()
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/CL/ReshapeLayer.cpp b/tests/benchmark/CL/ReshapeLayer.cpp
new file mode 100644
index 0000000..550deed
--- /dev/null
+++ b/tests/benchmark/CL/ReshapeLayer.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/benchmark/fixtures/ReshapeLayerFixture.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::U8, DataType::U16, DataType::U32, DataType::F16, DataType::F32 });
+} // namespace
+
+using CLReshapeLayerFixture = ReshapeLayerFixture<CLTensor, CLReshapeLayer, CLAccessor>;
+
+TEST_SUITE(CL)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(ReshapeLayer, CLReshapeLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(datasets::SmallShapes(), data_types));
+
+TEST_SUITE_END()
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/CL/SYSTEM/AlexNet.cpp b/tests/benchmark/CL/SYSTEM/AlexNet.cpp
index 5f25841..1d38c1a 100644
--- a/tests/benchmark/CL/SYSTEM/AlexNet.cpp
+++ b/tests/benchmark/CL/SYSTEM/AlexNet.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -60,7 +60,7 @@
 
 REGISTER_FIXTURE_DATA_TEST_CASE(AlexNet, CLAlexNetFixture, framework::DatasetMode::ALL,
                                 framework::dataset::combine(framework::dataset::make("DataType", { DataType::F16, DataType::F32 }),
-                                                            framework::dataset::make("Batches", { 1, 4, 8 })));
+                                                            framework::dataset::make("Batches", { 1, 2, 4 })));
 
 TEST_SUITE_END()
 TEST_SUITE_END()
diff --git a/tests/benchmark/CL/Scale.cpp b/tests/benchmark/CL/Scale.cpp
new file mode 100644
index 0000000..a1cc0a5
--- /dev/null
+++ b/tests/benchmark/CL/Scale.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLScale.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/benchmark/fixtures/ScaleFixture.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/SamplingPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto interpolation_types = framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR });
+} // namespace
+
+using CLScaleFixture = ScaleFixture<CLTensor, CLScale, CLAccessor>;
+
+TEST_SUITE(CL)
+TEST_SUITE(Scale)
+REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallImageShapes(), framework::dataset::make("DataType", { DataType::F16, DataType::F32 })),
+                                                                                                                     interpolation_types),
+                                                                                                             datasets::BorderModes()),
+                                                                                                     datasets::SamplingPolicies()));
+REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeImageShapes(), framework::dataset::make("DataType", { DataType::F16, DataType::F32 })),
+                                                                                                                   interpolation_types),
+                                                                                                           datasets::BorderModes()),
+                                                                                                   datasets::SamplingPolicies()));
+TEST_SUITE_END() // Scale
+TEST_SUITE_END() // CL
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/CL/SoftmaxLayer.cpp b/tests/benchmark/CL/SoftmaxLayer.cpp
index ff7d5fd..0bdc47b 100644
--- a/tests/benchmark/CL/SoftmaxLayer.cpp
+++ b/tests/benchmark/CL/SoftmaxLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,7 +39,7 @@
 {
 namespace
 {
-const auto data_types = framework::dataset::make("DataType", { DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32 });
+const auto data_types = framework::dataset::make("DataType", { DataType::QASYMM8, DataType::F16, DataType::F32 });
 } // namespace
 
 using CLSoftmaxLayerFixture = SoftmaxLayerFixture<CLTensor, CLSoftmaxLayer, CLAccessor>;
diff --git a/tests/benchmark/CL/Transpose.cpp b/tests/benchmark/CL/Transpose.cpp
new file mode 100644
index 0000000..33ca406
--- /dev/null
+++ b/tests/benchmark/CL/Transpose.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLTranspose.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/benchmark/fixtures/TransposeFixture.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::U8, DataType::U16, DataType::U32 });
+} // namespace
+
+using CLTransposeFixture = TransposeFixture<CLTensor, CLTranspose, CLAccessor>;
+
+TEST_SUITE(CL)
+TEST_SUITE(Transpose)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, CLTransposeFixture, framework::DatasetMode::PRECOMMIT,
+                                framework::dataset::combine(framework::dataset::concat(datasets::Small1DShapes(), datasets::Small2DShapes()), data_types));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, CLTransposeFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::concat(datasets::Large1DShapes(), datasets::Large2DShapes()), data_types));
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/GLES_COMPUTE/ConvolutionLayer.cpp b/tests/benchmark/GLES_COMPUTE/ConvolutionLayer.cpp
new file mode 100644
index 0000000..0d8edb7
--- /dev/null
+++ b/tests/benchmark/GLES_COMPUTE/ConvolutionLayer.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/benchmark/fixtures/ConvolutionLayerFixture.h"
+#include "tests/datasets/system_tests/alexnet/AlexNetConvolutionLayerDataset.h"
+#include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1ConvolutionLayerDataset.h"
+#include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4ConvolutionLayerDataset.h"
+#include "tests/datasets/system_tests/lenet5/LeNet5ConvolutionLayerDataset.h"
+#include "tests/datasets/system_tests/squeezenet/SqueezeNetConvolutionLayerDataset.h"
+#include "tests/datasets/system_tests/vgg/vgg16/VGG16ConvolutionLayerDataset.h"
+#include "tests/datasets/system_tests/yolo/v2/YOLOV2ConvolutionLayerDataset.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F16 });
+} // namespace
+
+using GCConvolutionLayerFixture = ConvolutionLayerFixture<GCTensor, GCConvolutionLayer, GCAccessor>;
+
+TEST_SUITE(GC)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::AlexNetConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(LeNet5ConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::LeNet5ConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1ConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1ConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4ConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4ConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+TEST_SUITE(NIGHTLY)
+REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::AlexNetConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(LeNet5ConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::LeNet5ConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1ConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1ConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4ConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4ConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
+// 8 batches use about 1.8GB of memory which is too much for most devices!
+REGISTER_FIXTURE_DATA_TEST_CASE(VGG16ConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::VGG16ConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 1, 4 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(YOLOV2ConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2ConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 1, 4, 8 })));
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp b/tests/benchmark/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp
new file mode 100644
index 0000000..9a0cbe3
--- /dev/null
+++ b/tests/benchmark/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h"
+#include "tests/datasets/system_tests/mobilenet/MobileNetDepthwiseConvolutionLayerDataset.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+const auto data_types                    = framework::dataset::make("DataType", { DataType::F16 });
+using GCDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerFixture<GCTensor, GCDepthwiseConvolutionLayer3x3, GCAccessor>;
+
+TEST_SUITE(GC)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(MobileNetDepthwiseConvLayer, GCDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::MobileNetDepthwiseConvolutionLayerDataset(), data_types),
+                                                            framework::dataset::make("Batches", { 1 })));
+
+TEST_SUITE_END()
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/GLES_COMPUTE/NormalizePlanarYUVLayer.cpp b/tests/benchmark/GLES_COMPUTE/NormalizePlanarYUVLayer.cpp
new file mode 100644
index 0000000..3324180
--- /dev/null
+++ b/tests/benchmark/GLES_COMPUTE/NormalizePlanarYUVLayer.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITCLSS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizePlanarYUVLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/benchmark/fixtures/NormalizePlanarYUVLayerFixture.h"
+#include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4NormalizePlanarYUVLayerDataset.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F16 });
+} // namespace
+
+using GCNormalizePlanarYUVLayerFixture = NormalizePlanarYUVLayerFixture<GCTensor, GCNormalizePlanarYUVLayer, GCAccessor>;
+
+TEST_SUITE(GC)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4NormalizePlanarYUVLayer, GCNormalizePlanarYUVLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4NormalizePlanarYUVLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+TEST_SUITE_END()
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/GLES_COMPUTE/Scale.cpp b/tests/benchmark/GLES_COMPUTE/Scale.cpp
new file mode 100644
index 0000000..12f3151
--- /dev/null
+++ b/tests/benchmark/GLES_COMPUTE/Scale.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCScale.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/benchmark/fixtures/ScaleLayerFixture.h"
+#include "tests/datasets/ScaleLayerDataset.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F16 });
+} // namespace
+
+template <typename T>
+using GCScaleLayerFixture = ScaleLayerFixture<GCTensor, GCScale, GCAccessor, T>;
+
+TEST_SUITE(GC)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(SmallScaleLayer, GCScaleLayerFixture<half>, framework::DatasetMode::ALL,
+                                framework::dataset::combine(datasets::SmallScaleLayerShapes(), data_types));
+
+TEST_SUITE(NIGHTLY)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(LargeScaleLayer, GCScaleLayerFixture<half>, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(datasets::LargeScaleLayerShapes(), data_types));
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/GLES_COMPUTE/Transpose.cpp b/tests/benchmark/GLES_COMPUTE/Transpose.cpp
new file mode 100644
index 0000000..a76ca8f
--- /dev/null
+++ b/tests/benchmark/GLES_COMPUTE/Transpose.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/benchmark/fixtures/TransposeFixture.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
+} // namespace
+
+using GCTransposeFixture = TransposeFixture<GCTensor, GCTranspose, GCAccessor>;
+
+TEST_SUITE(GC)
+TEST_SUITE(Transpose)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, GCTransposeFixture, framework::DatasetMode::PRECOMMIT,
+                                framework::dataset::combine(datasets::Small2DShapes(), data_types));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, GCTransposeFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(datasets::Large2DShapes(), data_types));
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/NEON/ActivationLayer.cpp b/tests/benchmark/NEON/ActivationLayer.cpp
index 6344da8..58aa42a 100644
--- a/tests/benchmark/NEON/ActivationLayer.cpp
+++ b/tests/benchmark/NEON/ActivationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,6 +32,7 @@
 #include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1ActivationLayerDataset.h"
 #include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4ActivationLayerDataset.h"
 #include "tests/datasets/system_tests/lenet5/LeNet5ActivationLayerDataset.h"
+#include "tests/datasets/system_tests/mobilenet/MobileNetActivationLayerDataset.h"
 #include "tests/datasets/system_tests/squeezenet/SqueezeNetActivationLayerDataset.h"
 #include "tests/datasets/system_tests/vgg/vgg16/VGG16ActivationLayerDataset.h"
 #include "tests/datasets/system_tests/yolo/v2/YOLOV2ActivationLayerDataset.h"
@@ -46,9 +47,11 @@
 namespace
 {
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8, DataType::QS16 });
+const auto data_types           = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8, DataType::QS16 });
+const auto data_types_mobilenet = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8, DataType::QS16, DataType::QASYMM8 });
 #else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8, DataType::QS16 });
+const auto data_types           = framework::dataset::make("DataType", { DataType::F32, DataType::QS8, DataType::QS16 });
+const auto data_types_mobilenet = framework::dataset::make("DataType", { DataType::F32, DataType::QS8, DataType::QS16, DataType::QASYMM8 });
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 } // namespace
 
@@ -64,6 +67,10 @@
                                 framework::dataset::combine(framework::dataset::combine(datasets::LeNet5ActivationLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", 1)));
 
+REGISTER_FIXTURE_DATA_TEST_CASE(MobileNetActivationLayer, NEActivationLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::MobileNetActivationLayerDataset(), data_types_mobilenet),
+                                                            framework::dataset::make("Batches", 1)));
+
 REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1ActivationLayer, NEActivationLayerFixture, framework::DatasetMode::ALL,
                                 framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1ActivationLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", 1)));
@@ -93,6 +100,10 @@
                                 framework::dataset::combine(framework::dataset::combine(datasets::LeNet5ActivationLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", { 4, 8 })));
 
+REGISTER_FIXTURE_DATA_TEST_CASE(MobileNetActivationLayer, NEActivationLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::MobileNetActivationLayerDataset(), data_types_mobilenet),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
 REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1ActivationLayer, NEActivationLayerFixture, framework::DatasetMode::NIGHTLY,
                                 framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1ActivationLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", { 4, 8 })));
diff --git a/tests/benchmark/NEON/BatchNormalizationLayer.cpp b/tests/benchmark/NEON/BatchNormalizationLayer.cpp
index 5b568de..3a7f2c6 100644
--- a/tests/benchmark/NEON/BatchNormalizationLayer.cpp
+++ b/tests/benchmark/NEON/BatchNormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,9 +42,9 @@
 namespace
 {
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8 });
+const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
 #else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8 });
+const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 } // namespace
 
diff --git a/tests/benchmark/NEON/ConvolutionLayer.cpp b/tests/benchmark/NEON/ConvolutionLayer.cpp
index 45138d7..b2aa929 100644
--- a/tests/benchmark/NEON/ConvolutionLayer.cpp
+++ b/tests/benchmark/NEON/ConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,9 +47,9 @@
 namespace
 {
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8, DataType::QS16 });
+const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
 #else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8, DataType::QS16 });
+const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 } // namespace
 
diff --git a/tests/benchmark/NEON/DepthConcatenateLayer.cpp b/tests/benchmark/NEON/DepthConcatenateLayer.cpp
new file mode 100644
index 0000000..04c0bff
--- /dev/null
+++ b/tests/benchmark/NEON/DepthConcatenateLayer.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/benchmark/fixtures/DepthConcatenateLayerFixture.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
+} // namespace
+
+using NEDepthConcatenateLayerFixture = DepthConcatenateLayerFixture<Tensor, ITensor, NEDepthConcatenateLayer, Accessor>;
+
+TEST_SUITE(NE)
+TEST_SUITE(DepthConcatenateLayer)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, framework::dataset::combine(datasets::Small2DShapes(), data_types));
+REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, framework::dataset::combine(datasets::DepthConcatenateLayerShapes(), data_types));
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/NEON/DequantizationLayer.cpp b/tests/benchmark/NEON/DequantizationLayer.cpp
new file mode 100644
index 0000000..9a0a1e7
--- /dev/null
+++ b/tests/benchmark/NEON/DequantizationLayer.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/benchmark/fixtures/DequantizationLayerFixture.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto data_types_src = framework::dataset::make("DataType", { DataType::U8 });
+const auto data_types_dst = framework::dataset::make("DataType", { DataType::F32 });
+} // namespace
+
+using NEDequantizationLayerFixture = DequantizationLayerFixture<Tensor, NEDequantizationLayer, Accessor>;
+
+TEST_SUITE(NEON)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(DequantizationLayer, NEDequantizationLayerFixture,
+                                framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::Small3DShapes(), data_types_src), data_types_dst));
+
+TEST_SUITE_END()
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/NEON/DirectConvolutionLayer.cpp b/tests/benchmark/NEON/DirectConvolutionLayer.cpp
index 4b93eea..7854f61 100644
--- a/tests/benchmark/NEON/DirectConvolutionLayer.cpp
+++ b/tests/benchmark/NEON/DirectConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,11 +47,9 @@
 {
 // Special data types for networks that need 5x5 direct convolution, which does not support Fixed Point
 #ifdef ARM_COMPUTE_ENABLE_F16
-const auto data_types          = framework::dataset::make("DataType", { DataType::QS8, DataType::F16, DataType::F32 });
-const auto data_types_no_fixed = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
+const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
 #else  /* ARM_COMPUTE_ENABLE_F16 */
-const auto data_types          = framework::dataset::make("DataType", { DataType::QS8, DataType::F32 });
-const auto data_types_no_fixed = framework::dataset::make("DataType", { DataType::F32 });
+const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
 #endif /* ARM_COMPUTE_ENABLE_F16 */
 } // namespace
 
@@ -60,11 +58,11 @@
 TEST_SUITE(NEON)
 
 REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL,
-                                framework::dataset::combine(framework::dataset::combine(datasets::AlexNetDirectConvolutionLayerDataset(), data_types_no_fixed),
+                                framework::dataset::combine(framework::dataset::combine(datasets::AlexNetDirectConvolutionLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", 1)));
 
 REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL,
-                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1DirectConvolutionLayerDataset(), data_types_no_fixed),
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1DirectConvolutionLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", 1)));
 
 REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL,
@@ -77,11 +75,11 @@
 
 TEST_SUITE(NIGHTLY)
 REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
-                                framework::dataset::combine(framework::dataset::combine(datasets::AlexNetDirectConvolutionLayerDataset(), data_types_no_fixed),
+                                framework::dataset::combine(framework::dataset::combine(datasets::AlexNetDirectConvolutionLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", { 4, 8 })));
 
 REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
-                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1DirectConvolutionLayerDataset(), data_types_no_fixed),
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1DirectConvolutionLayerDataset(), data_types),
                                                             framework::dataset::make("Batches", { 4, 8 })));
 
 REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
diff --git a/tests/benchmark/NEON/FlattenLayer.cpp b/tests/benchmark/NEON/FlattenLayer.cpp
new file mode 100644
index 0000000..9479f01
--- /dev/null
+++ b/tests/benchmark/NEON/FlattenLayer.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/benchmark/fixtures/FlattenLayerFixture.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
+} // namespace
+
+using NEFllattenLayerFixture = FlattenLayerFixture<Tensor, NEFlattenLayer, Accessor>;
+
+TEST_SUITE(NEON)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(FlattenLayer, NEFllattenLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(datasets::Small4DShapes(), data_types));
+
+TEST_SUITE_END()
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/NEON/FullyConnectedLayer.cpp b/tests/benchmark/NEON/FullyConnectedLayer.cpp
index c914d26..edd4406 100644
--- a/tests/benchmark/NEON/FullyConnectedLayer.cpp
+++ b/tests/benchmark/NEON/FullyConnectedLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,9 +44,9 @@
 namespace
 {
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8, DataType::QS16 });
+const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
 #else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8, DataType::QS16 });
+const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 } // namespace
 
diff --git a/tests/benchmark/NEON/GEMM.cpp b/tests/benchmark/NEON/GEMM.cpp
index 9c4cd59..1d6ea8d 100644
--- a/tests/benchmark/NEON/GEMM.cpp
+++ b/tests/benchmark/NEON/GEMM.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,8 +46,7 @@
 #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
     DataType::F16,
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-    DataType::F32,
-    DataType::QS8
+    DataType::F32
 });
 } // namespace
 
diff --git a/tests/benchmark/NEON/GEMMInterleave4x4.cpp b/tests/benchmark/NEON/GEMMInterleave4x4.cpp
new file mode 100644
index 0000000..fa9ce0b
--- /dev/null
+++ b/tests/benchmark/NEON/GEMMInterleave4x4.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/benchmark/fixtures/GEMMInterleave4x4Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto data_shapes = framework::dataset::make("M", 8, 12) * framework::dataset::make("N", 8, 12);
+const auto data_types  = framework::dataset::make("DataType", { DataType::U8, DataType::U16, DataType::U32 });
+} // namespace
+
+using NEGEMMInterleave4x4Fixture = GEMMInterleave4x4Fixture<Tensor, NEGEMMInterleave4x4, Accessor>;
+
+TEST_SUITE(NEON)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GEMMInterleave4x4, NEGEMMInterleave4x4Fixture, framework::DatasetMode::ALL, data_shapes *data_types);
+
+TEST_SUITE_END()
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/NEON/HarrisCorners.cpp b/tests/benchmark/NEON/HarrisCorners.cpp
new file mode 100644
index 0000000..33315ff
--- /dev/null
+++ b/tests/benchmark/NEON/HarrisCorners.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEHarrisCorners.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/benchmark/fixtures/HarrisCornersFixture.h"
+#include "tests/datasets/ImageFileDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto threshold     = framework::dataset::make("Threshold", { 0.00115f });
+const auto min_dist      = framework::dataset::make("MinDist", { 2.f });
+const auto sensitivity   = framework::dataset::make("Sensitivity", { 0.04f });
+const auto gradient_size = framework::dataset::make("GradientSize", { 3, 5, 7 });
+const auto block_size    = framework::dataset::make("BlockSize", { 3, 5, 7 });
+const auto border_mode   = framework::dataset::make("BorderMode", { BorderMode::UNDEFINED, BorderMode::CONSTANT, BorderMode::REPLICATE });
+} // namespace
+
+using NEHarrisCornersFixture = HarrisCornersFixture<Tensor, NEHarrisCorners, Accessor, KeyPointArray>;
+
+TEST_SUITE(NEON)
+TEST_SUITE(HarrisCorners)
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+TEST_SUITE(FP16)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, NEHarrisCornersFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(combine(datasets::SmallImageFiles(),
+                                                                                                                     framework::dataset::make("Format", { Format::U8 })),
+                                                                                                                     threshold),
+                                                                                                                     min_dist),
+                                                                                                                     sensitivity),
+                                                                                                                     gradient_size),
+                                                                                                                     block_size),
+                                                                                                                     border_mode),
+                                                                                                             framework::dataset::make("UseFP16", { true })));
+REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, NEHarrisCornersFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine(datasets::LargeImageFiles(),
+                                                                                                                   framework::dataset::make("Format", { Format::U8 })),
+                                                                                                                   threshold),
+                                                                                                                   min_dist),
+                                                                                                                   sensitivity),
+                                                                                                                   gradient_size),
+                                                                                                                   block_size),
+                                                                                                                   border_mode),
+                                                                                                           framework::dataset::make("UseFP16", { true })));
+TEST_SUITE_END() // FP16
+#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+TEST_SUITE(S16)
+REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, NEHarrisCornersFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(combine(datasets::SmallImageFiles(),
+                                                                                                                     framework::dataset::make("Format", { Format::U8 })),
+                                                                                                                     threshold),
+                                                                                                                     min_dist),
+                                                                                                                     sensitivity),
+                                                                                                                     gradient_size),
+                                                                                                                     block_size),
+                                                                                                                     border_mode),
+                                                                                                             framework::dataset::make("UseFP16", { false })));
+REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, NEHarrisCornersFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine(datasets::LargeImageFiles(),
+                                                                                                                   framework::dataset::make("Format", { Format::U8 })),
+                                                                                                                   threshold),
+                                                                                                                   min_dist),
+                                                                                                                   sensitivity),
+                                                                                                                   gradient_size),
+                                                                                                                   block_size),
+                                                                                                                   border_mode),
+                                                                                                           framework::dataset::make("UseFP16", { false })));
+TEST_SUITE_END() // S16
+TEST_SUITE_END() // HarrisCorners
+TEST_SUITE_END() // NEON
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
\ No newline at end of file
diff --git a/tests/benchmark/NEON/L2NormalizeLayer.cpp b/tests/benchmark/NEON/L2NormalizeLayer.cpp
new file mode 100644
index 0000000..14a9784
--- /dev/null
+++ b/tests/benchmark/NEON/L2NormalizeLayer.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/benchmark/fixtures/L2NormalizeLayerFixture.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
+} // namespace
+
+using NEL2NormalizeLayerFixture = L2NormalizeLayerFixture<Tensor, NEL2NormalizeLayer, Accessor>;
+
+TEST_SUITE(NEON)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(L2NormalizeLayer, NEL2NormalizeLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::SmallShapes(), data_types), framework::dataset::make("Axis", { 0 })));
+TEST_SUITE(NIGHTLY)
+REGISTER_FIXTURE_DATA_TEST_CASE(L2NormalizeLayer, NEL2NormalizeLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::LargeShapes(), data_types), framework::dataset::make("Axis", { 0 })));
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/NEON/Magnitude.cpp b/tests/benchmark/NEON/Magnitude.cpp
new file mode 100644
index 0000000..e2b1210
--- /dev/null
+++ b/tests/benchmark/NEON/Magnitude.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEMagnitude.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/benchmark/fixtures/MagnitudeFixture.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto magnitude_types = framework::dataset::make("MagnitudeType", { MagnitudeType::L1NORM, MagnitudeType::L2NORM });
+} // namespace
+
+using NEMagnitudeFixture = MagnitudeFixture<Tensor, NEMagnitude, Accessor>;
+
+TEST_SUITE(NEON)
+TEST_SUITE(Magnitude)
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+TEST_SUITE(FP16)
+REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, NEMagnitudeFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallImageShapes(), framework::dataset::make("Format", { Format::S16 })),
+                                                                                                                 magnitude_types),
+                                                                                                         framework::dataset::make("UseFP16", { true })));
+REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, NEMagnitudeFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeImageShapes(), framework::dataset::make("Format", { Format::S16 })),
+                                                                                                               magnitude_types),
+                                                                                                       framework::dataset::make("UseFP16", { true })));
+TEST_SUITE_END() // FP16
+#endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+TEST_SUITE(S16)
+REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, NEMagnitudeFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallImageShapes(), framework::dataset::make("Format", { Format::S16 })),
+                                                                                                                 magnitude_types),
+                                                                                                         framework::dataset::make("UseFP16", { false })));
+REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, NEMagnitudeFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeImageShapes(), framework::dataset::make("Format", { Format::S16 })),
+                                                                                                               magnitude_types),
+                                                                                                       framework::dataset::make("UseFP16", { false })));
+TEST_SUITE_END() // S16
+TEST_SUITE_END() // Magnitude
+TEST_SUITE_END() // NEON
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/NEON/NormalizationLayer.cpp b/tests/benchmark/NEON/NormalizationLayer.cpp
index 3b1720d..1f2b2a3 100644
--- a/tests/benchmark/NEON/NormalizationLayer.cpp
+++ b/tests/benchmark/NEON/NormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,9 +41,9 @@
 namespace
 {
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-const auto data_types = framework::dataset::make("DataType", { DataType::QS8, DataType::QS16, DataType::F16, DataType::F32 });
+const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
 #else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-const auto data_types = framework::dataset::make("DataType", { DataType::QS8, DataType::QS16, DataType::F32 });
+const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 } // namespace
 using NENormalizationLayerFixture = NormalizationLayerFixture<Tensor, NENormalizationLayer, Accessor>;
diff --git a/tests/benchmark/NEON/PoolingLayer.cpp b/tests/benchmark/NEON/PoolingLayer.cpp
index 4815959..c1e1a01 100644
--- a/tests/benchmark/NEON/PoolingLayer.cpp
+++ b/tests/benchmark/NEON/PoolingLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,9 +46,9 @@
 namespace
 {
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8 });
+const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QASYMM8 });
 #else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8 });
+const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QASYMM8 });
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 } // namespace
 
diff --git a/tests/benchmark/NEON/QuantizationLayer.cpp b/tests/benchmark/NEON/QuantizationLayer.cpp
new file mode 100644
index 0000000..8db6728
--- /dev/null
+++ b/tests/benchmark/NEON/QuantizationLayer.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/benchmark/fixtures/QuantizationLayerFixture.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
+} // namespace
+
+using NEQuantizationLayerFixture = QuantizationLayerFixture<Tensor, NEQuantizationLayer, Accessor>;
+
+TEST_SUITE(NEON)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(QuantizationLayer, NEQuantizationLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(datasets::Small3DShapes(), data_types));
+
+TEST_SUITE_END()
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/NEON/ReshapeLayer.cpp b/tests/benchmark/NEON/ReshapeLayer.cpp
new file mode 100644
index 0000000..46985e0
--- /dev/null
+++ b/tests/benchmark/NEON/ReshapeLayer.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/benchmark/fixtures/ReshapeLayerFixture.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::U8, DataType::U16, DataType::U32, DataType::F16, DataType::F32 });
+} // namespace
+
+using NEReshapeLayerFixture = ReshapeLayerFixture<Tensor, NEReshapeLayer, Accessor>;
+
+TEST_SUITE(NEON)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(Reshape, NEReshapeLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(datasets::SmallShapes(), data_types));
+
+TEST_SUITE_END()
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/NEON/SYSTEM/AlexNet.cpp b/tests/benchmark/NEON/SYSTEM/AlexNet.cpp
index ad16d47..2da6180 100644
--- a/tests/benchmark/NEON/SYSTEM/AlexNet.cpp
+++ b/tests/benchmark/NEON/SYSTEM/AlexNet.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,9 +46,9 @@
 namespace
 {
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-const auto alex_net_data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8 });
+const auto alex_net_data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 });
 #else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-const auto alex_net_data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8 });
+const auto alex_net_data_types = framework::dataset::make("DataType", { DataType::F32 });
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 } // namespace
 
@@ -69,7 +69,7 @@
 
 REGISTER_FIXTURE_DATA_TEST_CASE(AlexNet, NEAlexNetFixture, framework::DatasetMode::ALL,
                                 framework::dataset::combine(alex_net_data_types,
-                                                            framework::dataset::make("Batches", { 1, 4, 8 })));
+                                                            framework::dataset::make("Batches", { 1, 2, 4 })));
 
 TEST_SUITE_END()
 TEST_SUITE_END()
diff --git a/tests/benchmark/NEON/Scale.cpp b/tests/benchmark/NEON/Scale.cpp
new file mode 100644
index 0000000..9b2f0bc
--- /dev/null
+++ b/tests/benchmark/NEON/Scale.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEScale.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/benchmark/fixtures/ScaleFixture.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto interpolation_types = framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR });
+} // namespace
+
+using NEScaleFixture = ScaleFixture<Tensor, NEScale, Accessor>;
+
+TEST_SUITE(NEON)
+TEST_SUITE(Scale)
+REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallImageShapes(), framework::dataset::make("DataType", { DataType::U8, DataType::S16, DataType::F32 })),
+                                                                                                                     interpolation_types),
+                                                                                                             datasets::BorderModes()),
+                                                                                                     framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER })));
+REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeImageShapes(), framework::dataset::make("DataType", { DataType::U8, DataType::S16, DataType::F32 })),
+                                                                                                                   interpolation_types),
+                                                                                                           datasets::BorderModes()),
+                                                                                                   framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER })));
+TEST_SUITE_END() // Scale
+TEST_SUITE_END() // NEON
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/NEON/SoftmaxLayer.cpp b/tests/benchmark/NEON/SoftmaxLayer.cpp
index 399d581..9a6f6a4 100644
--- a/tests/benchmark/NEON/SoftmaxLayer.cpp
+++ b/tests/benchmark/NEON/SoftmaxLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,7 +39,7 @@
 {
 namespace
 {
-const auto data_types = framework::dataset::make("DataType", { DataType::QS8, DataType::QS16, DataType::F32 });
+const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
 } // namespace
 
 using NESoftmaxLayerFixture = SoftmaxLayerFixture<Tensor, NESoftmaxLayer, Accessor>;
diff --git a/tests/benchmark/NEON/Transpose.cpp b/tests/benchmark/NEON/Transpose.cpp
new file mode 100644
index 0000000..d60a1ba
--- /dev/null
+++ b/tests/benchmark/NEON/Transpose.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NETranspose.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/benchmark/fixtures/TransposeFixture.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::U8, DataType::U16, DataType::U32 });
+} // namespace
+
+using NETransposeFixture = TransposeFixture<Tensor, NETranspose, Accessor>;
+
+TEST_SUITE(NEON)
+TEST_SUITE(Transpose)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, NETransposeFixture, framework::DatasetMode::PRECOMMIT,
+                                framework::dataset::combine(framework::dataset::concat(datasets::Small1DShapes(), datasets::Small2DShapes()), data_types));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, NETransposeFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::concat(datasets::Large1DShapes(), datasets::Large2DShapes()), data_types));
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/fixtures/ActivationLayerFixture.h b/tests/benchmark/fixtures/ActivationLayerFixture.h
index 9ded063..8558527 100644
--- a/tests/benchmark/fixtures/ActivationLayerFixture.h
+++ b/tests/benchmark/fixtures/ActivationLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,9 +56,6 @@
         // Allocate tensors
         src.allocator()->allocate();
         dst.allocator()->allocate();
-
-        // Fill tensors
-        library->fill_tensor_uniform(Accessor(src), 0);
     }
 
     void run()
@@ -66,6 +63,12 @@
         act_layer.run();
     }
 
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
     void teardown()
     {
         src.allocator()->free();
diff --git a/tests/benchmark/fixtures/AlexNetFixture.h b/tests/benchmark/fixtures/AlexNetFixture.h
index 961f4e8..e15aa62 100644
--- a/tests/benchmark/fixtures/AlexNetFixture.h
+++ b/tests/benchmark/fixtures/AlexNetFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -51,13 +51,12 @@
     template <typename...>
     void setup(DataType data_type, int batches)
     {
-        constexpr bool weights_reshaped     = true;
+        constexpr bool weights_reshaped     = false;
         constexpr int  fixed_point_position = 4;
 
         network.init(data_type, fixed_point_position, batches, weights_reshaped);
         network.build();
         network.allocate();
-        network.fill_random();
     }
 
     void run()
@@ -65,6 +64,11 @@
         network.run();
     }
 
+    void sync()
+    {
+        network.sync();
+    }
+
     void teardown()
     {
         network.clear();
diff --git a/tests/benchmark/fixtures/BatchNormalizationLayerFixture.h b/tests/benchmark/fixtures/BatchNormalizationLayerFixture.h
index 55411a4..38a3263 100644
--- a/tests/benchmark/fixtures/BatchNormalizationLayerFixture.h
+++ b/tests/benchmark/fixtures/BatchNormalizationLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,12 +29,6 @@
 #include "tests/Globals.h"
 #include "tests/Utils.h"
 #include "tests/framework/Fixture.h"
-#ifdef ARM_COMPUTE_GC
-#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
-#include "tests/GLES_COMPUTE/Helper.h"
-
-using namespace arm_compute::test::gles_compute;
-#endif /* ARM_COMPUTE_GC */
 
 namespace arm_compute
 {
@@ -70,24 +64,17 @@
         variance.allocator()->allocate();
         beta.allocator()->allocate();
         gamma.allocator()->allocate();
-
-        // Fill tensors
-        library->fill_tensor_uniform(Accessor(src), 0);
-        library->fill_tensor_uniform(Accessor(mean), 1);
-        library->fill_tensor_uniform(Accessor(variance), 2);
-        library->fill_tensor_uniform(Accessor(beta), 3);
-        library->fill_tensor_uniform(Accessor(gamma), 4);
     }
 
     void run()
     {
         batch_norm_layer.run();
-#ifdef ARM_COMPUTE_GC
-        if(opengles31_is_available() && std::is_same<typename std::decay<TensorType>::type, arm_compute::GCTensor>::value)
-        {
-            force_sync_tensor(dst);
-        }
-#endif /* ARM_COMPUTE_GC */
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
     }
 
     void teardown()
diff --git a/tests/benchmark/fixtures/ConvolutionLayerFixture.h b/tests/benchmark/fixtures/ConvolutionLayerFixture.h
index b526cc3..0d2c3fd 100644
--- a/tests/benchmark/fixtures/ConvolutionLayerFixture.h
+++ b/tests/benchmark/fixtures/ConvolutionLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,13 +30,6 @@
 #include "tests/Utils.h"
 #include "tests/framework/Fixture.h"
 
-#ifdef ARM_COMPUTE_GC
-#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
-#include "tests/GLES_COMPUTE/Helper.h"
-
-using namespace arm_compute::test::gles_compute;
-#endif /* ARM_COMPUTE_GC */
-
 namespace arm_compute
 {
 namespace test
@@ -68,22 +61,17 @@
         weights.allocator()->allocate();
         biases.allocator()->allocate();
         dst.allocator()->allocate();
-
-        // Fill tensors
-        library->fill_tensor_uniform(Accessor(src), 0);
-        library->fill_tensor_uniform(Accessor(weights), 1);
-        library->fill_tensor_uniform(Accessor(biases), 2);
     }
 
     void run()
     {
         conv_layer.run();
-#ifdef ARM_COMPUTE_GC
-        if(opengles31_is_available() && std::is_same<typename std::decay<TensorType>::type, arm_compute::GCTensor>::value)
-        {
-            force_sync_tensor(dst);
-        }
-#endif /* ARM_COMPUTE_GC */
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
     }
 
     void teardown()
diff --git a/tests/benchmark/fixtures/DepthConcatenateLayerFixture.h b/tests/benchmark/fixtures/DepthConcatenateLayerFixture.h
new file mode 100644
index 0000000..bd4b404
--- /dev/null
+++ b/tests/benchmark/fixtures/DepthConcatenateLayerFixture.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_DEPTHCONCATENATELAYERFIXTURE
+#define ARM_COMPUTE_TEST_DEPTHCONCATENATELAYERFIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Fixture.h"
+
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+/** Fixture that can be used for NE/CL/GC */
+template <typename TensorType, typename ITensorType, typename Function, typename AccessorType>
+class DepthConcatenateLayerFixture : public framework::Fixture
+{
+public:
+    inline std::vector<TensorShape> generate_input_shapes(TensorShape shape)
+    {
+        // Create input shapes
+        std::mt19937                    gen(library->seed());
+        std::uniform_int_distribution<> num_dis(2, 6);
+        const int                       num_tensors = num_dis(gen);
+
+        std::vector<TensorShape>         shapes(num_tensors, shape);
+        std::uniform_int_distribution<>  depth_dis(1, 7);
+        std::bernoulli_distribution      mutate_dis(0.25f);
+        std::uniform_real_distribution<> change_dis(-0.25f, 0.f);
+
+        // Generate more shapes based on the input
+        for(auto &s : shapes)
+        {
+            // Set the depth of the tensor
+            s.set(2, depth_dis(gen));
+
+            // Randomly change the first dimension
+            if(mutate_dis(gen))
+            {
+                // Decrease the dimension by a small percentage. Don't increase
+                // as that could make tensor too large. Also the change must be
+                // an even number. Otherwise out depth concatenate fails.
+                s.set(0, s[0] + 2 * static_cast<int>(s[0] * change_dis(gen)));
+            }
+
+            // Repeat the same as above for the second dimension
+            if(mutate_dis(gen))
+            {
+                s.set(1, s[1] + 2 * static_cast<int>(s[1] * change_dis(gen)));
+            }
+        }
+
+        return shapes;
+    }
+
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type)
+    {
+        // Generate input shapes
+        std::vector<TensorShape> src_shapes = generate_input_shapes(shape);
+
+        // Create tensors
+        _srcs.reserve(src_shapes.size());
+
+        std::vector<ITensorType *> src_ptrs;
+
+        for(const auto &shape : src_shapes)
+        {
+            _srcs.emplace_back(create_tensor<TensorType>(shape, data_type, 1, _fractional_bits));
+            src_ptrs.emplace_back(&_srcs.back());
+        }
+
+        TensorShape dst_shape = calculate_depth_concatenate_shape(src_ptrs);
+        _dst                  = create_tensor<TensorType>(dst_shape, data_type, 1, _fractional_bits);
+
+        _depth_concat.configure(src_ptrs, &_dst);
+
+        for(auto &src : _srcs)
+        {
+            src.allocator()->allocate();
+        }
+
+        _dst.allocator()->allocate();
+    }
+
+    void run()
+    {
+        _depth_concat.run();
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(_dst);
+    }
+
+    void teardown()
+    {
+        for(auto &src : _srcs)
+        {
+            src.allocator()->free();
+        }
+
+        _srcs.clear();
+
+        _dst.allocator()->free();
+    }
+
+private:
+    std::vector<TensorType> _srcs{};
+    TensorType              _dst{};
+    Function                _depth_concat{};
+    int                     _fractional_bits{ 1 };
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_DEPTHCONCATENATELAYERFIXTURE */
diff --git a/tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h
index 9a49d56..8283b4d 100644
--- a/tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h
+++ b/tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,7 +40,7 @@
 {
 public:
     template <typename...>
-    void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape dst_shape, PadStrideInfo info, DataType data_type, int batches)
+    void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape dst_shape, PadStrideInfo info, DataType data_type, int batches)
     {
         // Set batched in source and destination shapes
         const unsigned int fixed_point_position = 4;
@@ -50,7 +50,7 @@
         // Create tensors
         src     = create_tensor<TensorType>(src_shape, data_type, 1, fixed_point_position);
         weights = create_tensor<TensorType>(weights_shape, data_type, 1, fixed_point_position);
-        biases  = create_tensor<TensorType>(biases_shape, data_type, 1, fixed_point_position);
+        biases  = create_tensor<TensorType>(TensorShape(weights_shape[2]), is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type, 1, fixed_point_position);
         dst     = create_tensor<TensorType>(dst_shape, data_type, 1, fixed_point_position);
 
         // Create and configure function
@@ -61,10 +61,6 @@
         weights.allocator()->allocate();
         biases.allocator()->allocate();
         dst.allocator()->allocate();
-
-        // Fill tensors
-        library->fill_tensor_uniform(Accessor(src), 0);
-        library->fill_tensor_uniform(Accessor(weights), 1);
     }
 
     void run()
@@ -72,6 +68,12 @@
         depth_conv.run();
     }
 
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
     void teardown()
     {
         src.allocator()->free();
diff --git a/tests/benchmark/fixtures/DepthwiseSeparableConvolutionLayerFixture.h b/tests/benchmark/fixtures/DepthwiseSeparableConvolutionLayerFixture.h
index fa15440..f78c3e4 100644
--- a/tests/benchmark/fixtures/DepthwiseSeparableConvolutionLayerFixture.h
+++ b/tests/benchmark/fixtures/DepthwiseSeparableConvolutionLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -69,13 +69,6 @@
         pointwise_weights.allocator()->allocate();
         pointwise_biases.allocator()->allocate();
         dst.allocator()->allocate();
-
-        // Fill tensors
-        library->fill_tensor_uniform(Accessor(src), 0);
-        library->fill_tensor_uniform(Accessor(depthwise_weights), 1);
-        library->fill_tensor_uniform(Accessor(depthwise_biases), 2);
-        library->fill_tensor_uniform(Accessor(pointwise_weights), 3);
-        library->fill_tensor_uniform(Accessor(pointwise_biases), 4);
     }
 
     void run()
@@ -83,6 +76,12 @@
         depth_sep_conv_layer.run();
     }
 
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
     void teardown()
     {
         src.allocator()->free();
diff --git a/tests/benchmark/fixtures/DequantizationLayerFixture.h b/tests/benchmark/fixtures/DequantizationLayerFixture.h
new file mode 100644
index 0000000..5ea8b2d
--- /dev/null
+++ b/tests/benchmark/fixtures/DequantizationLayerFixture.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_DEQUANTIZATION_LAYER_FIXTURE
+#define ARM_COMPUTE_TEST_DEQUANTIZATION_LAYER_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Fixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+/** Fixture that can be used for NEON and CL */
+template <typename TensorType, typename Function, typename Accessor>
+class DequantizationLayerFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type_src, DataType data_type_dst)
+    {
+        TensorShape shape_min_max = shape;
+        shape_min_max.set(Window::DimX, 2);
+
+        // Remove Y and Z dimensions and keep the batches
+        shape_min_max.remove_dimension(1);
+        shape_min_max.remove_dimension(1);
+
+        // Create tensors
+        src     = create_tensor<TensorType>(shape, data_type_src);
+        dst     = create_tensor<TensorType>(shape, data_type_dst);
+        min_max = create_tensor<TensorType>(shape_min_max, data_type_dst);
+
+        // Create and configure function
+        dequantization_func.configure(&src, &dst, &min_max);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+        min_max.allocator()->allocate();
+    }
+
+    void run()
+    {
+        dequantization_func.run();
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
+    void teardown()
+    {
+        src.allocator()->free();
+        dst.allocator()->free();
+        min_max.allocator()->free();
+    }
+
+private:
+    TensorType src{};
+    TensorType dst{};
+    TensorType min_max{};
+    Function   dequantization_func{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_DEQUANTIZATION_LAYER_FIXTURE */
diff --git a/tests/benchmark/fixtures/FlattenLayerFixture.h b/tests/benchmark/fixtures/FlattenLayerFixture.h
new file mode 100644
index 0000000..749fa0d
--- /dev/null
+++ b/tests/benchmark/fixtures/FlattenLayerFixture.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_FLATTEN_LAYER_FIXTURE
+#define ARM_COMPUTE_TEST_FLATTEN_LAYER_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Fixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+template <typename TensorType, typename Function, typename Accessor>
+class FlattenLayerFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type)
+    {
+        TensorShape shape_flatten(shape);
+        shape_flatten.collapse(3);
+
+        const unsigned int fixed_point_position = is_data_type_fixed_point(data_type) ? 4 : 0;
+
+        // Create tensors
+        src = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position);
+        dst = create_tensor<TensorType>(shape_flatten, data_type, 1, fixed_point_position);
+
+        // Create and configure function
+        flatten_func.configure(&src, &dst);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+    }
+
+    void run()
+    {
+        flatten_func.run();
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
+    void teardown()
+    {
+        src.allocator()->free();
+        dst.allocator()->free();
+    }
+
+private:
+    TensorType src{};
+    TensorType dst{};
+    Function   flatten_func{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_FLATTEN_LAYER_FIXTURE */
diff --git a/tests/benchmark/fixtures/FloorFixture.h b/tests/benchmark/fixtures/FloorFixture.h
index 8de87b8..df3a7da 100644
--- a/tests/benchmark/fixtures/FloorFixture.h
+++ b/tests/benchmark/fixtures/FloorFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -52,9 +52,6 @@
         // Allocate tensors
         src.allocator()->allocate();
         dst.allocator()->allocate();
-
-        // Fill tensors
-        library->fill_tensor_uniform(Accessor(src), 0);
     }
 
     void run()
@@ -62,6 +59,12 @@
         floor_func.run();
     }
 
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
     void teardown()
     {
         src.allocator()->free();
diff --git a/tests/benchmark/fixtures/FullyConnectedLayerFixture.h b/tests/benchmark/fixtures/FullyConnectedLayerFixture.h
index ef08c4a..b84fece 100644
--- a/tests/benchmark/fixtures/FullyConnectedLayerFixture.h
+++ b/tests/benchmark/fixtures/FullyConnectedLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,13 +30,6 @@
 #include "tests/Utils.h"
 #include "tests/framework/Fixture.h"
 
-#ifdef ARM_COMPUTE_GC
-#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
-#include "tests/GLES_COMPUTE/Helper.h"
-
-using namespace arm_compute::test::gles_compute;
-#endif /* ARM_COMPUTE_GC */
-
 namespace arm_compute
 {
 namespace test
@@ -68,22 +61,17 @@
         weights.allocator()->allocate();
         biases.allocator()->allocate();
         dst.allocator()->allocate();
-
-        // Fill tensors
-        library->fill_tensor_uniform(Accessor(src), 0);
-        library->fill_tensor_uniform(Accessor(weights), 1);
-        library->fill_tensor_uniform(Accessor(biases), 2);
     }
 
     void run()
     {
         fc_layer.run();
-#ifdef ARM_COMPUTE_GC
-        if(opengles31_is_available() && std::is_same<typename std::decay<TensorType>::type, arm_compute::GCTensor>::value)
-        {
-            force_sync_tensor(dst);
-        }
-#endif /* ARM_COMPUTE_GC */
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
     }
 
     void teardown()
diff --git a/tests/benchmark/fixtures/GEMMFixture.h b/tests/benchmark/fixtures/GEMMFixture.h
index 0c41c67..e958d4f 100644
--- a/tests/benchmark/fixtures/GEMMFixture.h
+++ b/tests/benchmark/fixtures/GEMMFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,10 +58,6 @@
         b.allocator()->allocate();
         c.allocator()->allocate();
         dst.allocator()->allocate();
-
-        library->fill_tensor_uniform(Accessor(a), 0);
-        library->fill_tensor_uniform(Accessor(b), 1);
-        library->fill_tensor_uniform(Accessor(c), 2);
     }
 
     void run()
@@ -69,6 +65,12 @@
         gemm.run();
     }
 
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
     void teardown()
     {
         a.allocator()->free();
diff --git a/tests/benchmark/fixtures/GEMMInterleave4x4Fixture.h b/tests/benchmark/fixtures/GEMMInterleave4x4Fixture.h
new file mode 100644
index 0000000..793c540
--- /dev/null
+++ b/tests/benchmark/fixtures/GEMMInterleave4x4Fixture.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_GEMM_INTERLEAVE4X4_FIXTURE
+#define ARM_COMPUTE_TEST_GEMM_INTERLEAVE4X4_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Fixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+/** Fixture that can be used for NEON and CL */
+template <typename TensorType, typename Function, typename Accessor>
+class GEMMInterleave4x4Fixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(size_t x, size_t y, DataType data_type)
+    {
+        constexpr int fixed_point_position = 4;
+
+        const TensorShape shape_a(x, y);
+        const TensorShape shape_b(static_cast<size_t>(x * 4.f), static_cast<size_t>(std::ceil(y / 4.f)));
+
+        // Create tensors
+        a = create_tensor<TensorType>(shape_a, data_type, 1, fixed_point_position);
+        b = create_tensor<TensorType>(shape_b, data_type, 1, fixed_point_position);
+
+        // Create and configure function
+        gemm.configure(&a, &b);
+
+        // Allocate tensors
+        a.allocator()->allocate();
+        b.allocator()->allocate();
+    }
+
+    void run()
+    {
+        gemm.run();
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(b);
+    }
+
+    void teardown()
+    {
+        a.allocator()->free();
+        b.allocator()->free();
+    }
+
+private:
+    TensorType a{};
+    TensorType b{};
+    Function   gemm{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_GEMM_INTERLEAVE4X4_FIXTURE */
diff --git a/tests/benchmark/fixtures/GEMMLowpFixture.h b/tests/benchmark/fixtures/GEMMLowpFixture.h
index 02448c3..e6eecf7 100644
--- a/tests/benchmark/fixtures/GEMMLowpFixture.h
+++ b/tests/benchmark/fixtures/GEMMLowpFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -60,16 +60,18 @@
         a.allocator()->allocate();
         b.allocator()->allocate();
         c.allocator()->allocate();
-
-        // Fill tensors
-        library->fill_tensor_uniform(Accessor(a), 0);
-        library->fill_tensor_uniform(Accessor(b), 1);
     }
     void run()
     {
         gemmlowp.run();
     }
 
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(c);
+    }
+
     void teardown()
     {
         a.allocator()->free();
diff --git a/tests/benchmark/fixtures/HarrisCornersFixture.h b/tests/benchmark/fixtures/HarrisCornersFixture.h
new file mode 100644
index 0000000..c7ce683
--- /dev/null
+++ b/tests/benchmark/fixtures/HarrisCornersFixture.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_HARRIS_CORNERS_FIXTURE
+#define ARM_COMPUTE_TEST_HARRIS_CORNERS_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Fixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+template <typename TensorType, typename Function, typename Accessor, typename ArrayType>
+class HarrisCornersFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(std::string image, Format format, float threshold, float min_dist, float sensitivity,
+               int32_t gradient_size, int32_t block_size,
+               BorderMode border_mode, bool use_fp16)
+    {
+        // Load the image (cached by the library if loaded before)
+        const RawTensor &raw = library->get(image, format);
+
+        // Create tensor
+        src = create_tensor<TensorType>(raw.shape(), format);
+
+        // Create and configure function
+        harris_corners_func.configure(&src, threshold, min_dist, sensitivity, gradient_size, block_size, &out, border_mode, 0, use_fp16);
+
+        // Allocate tensor
+        src.allocator()->allocate();
+
+        // Copy image data to tensor
+        library->fill(Accessor(src), raw);
+    }
+
+    void run()
+    {
+        harris_corners_func.run();
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+    }
+
+    void teardown()
+    {
+        src.allocator()->free();
+    }
+
+private:
+    TensorType src{};
+    ArrayType  out{ 20000 };
+    Function   harris_corners_func{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_HARRIS_CORNERS_FIXTURE */
\ No newline at end of file
diff --git a/tests/benchmark/fixtures/L2NormalizeLayerFixture.h b/tests/benchmark/fixtures/L2NormalizeLayerFixture.h
new file mode 100644
index 0000000..e68c5a7
--- /dev/null
+++ b/tests/benchmark/fixtures/L2NormalizeLayerFixture.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_FLOORFIXTURE
+#define ARM_COMPUTE_TEST_FLOORFIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Fixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+/** Fixture that can be used for NEON and CL */
+template <typename TensorType, typename Function, typename Accessor>
+class L2NormalizeLayerFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type, unsigned int axis)
+    {
+        // Create tensors
+        src = create_tensor<TensorType>(shape, data_type);
+        dst = create_tensor<TensorType>(shape, data_type);
+
+        // Create and configure function
+        l2norm_func.configure(&src, &dst, axis, 1.f);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+    }
+
+    void run()
+    {
+        l2norm_func.run();
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
+    void teardown()
+    {
+        src.allocator()->free();
+        dst.allocator()->free();
+    }
+
+private:
+    TensorType src{};
+    TensorType dst{};
+    Function   l2norm_func{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_FLOORFIXTURE */
diff --git a/tests/benchmark/fixtures/LeNet5Fixture.h b/tests/benchmark/fixtures/LeNet5Fixture.h
index 77a09d3..5108042 100644
--- a/tests/benchmark/fixtures/LeNet5Fixture.h
+++ b/tests/benchmark/fixtures/LeNet5Fixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,7 +49,6 @@
         network.init(batches);
         network.build();
         network.allocate();
-        network.fill_random();
     }
 
     void run()
@@ -57,6 +56,11 @@
         network.run();
     }
 
+    void sync()
+    {
+        network.sync();
+    }
+
     void teardown()
     {
         network.clear();
diff --git a/tests/benchmark/fixtures/MagnitudeFixture.h b/tests/benchmark/fixtures/MagnitudeFixture.h
new file mode 100644
index 0000000..f75540c
--- /dev/null
+++ b/tests/benchmark/fixtures/MagnitudeFixture.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_MAGNITUDE_FIXTURE
+#define ARM_COMPUTE_TEST_MAGNITUDE_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Fixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+template <typename TensorType, typename Function, typename Accessor>
+class MagnitudeFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(const TensorShape &shape, Format format, MagnitudeType magnitude_type, bool use_fp16)
+    {
+        // Create tensors
+        src1 = create_tensor<TensorType>(shape, format);
+        src2 = create_tensor<TensorType>(shape, format);
+        dst  = create_tensor<TensorType>(shape, format);
+
+        // Create and configure function
+        magnitude_func.configure(&src1, &src2, &dst, magnitude_type, use_fp16);
+
+        // Allocate tensors
+        src1.allocator()->allocate();
+        src2.allocator()->allocate();
+        dst.allocator()->allocate();
+    }
+
+    void run()
+    {
+        magnitude_func.run();
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
+    void teardown()
+    {
+        src1.allocator()->free();
+        src2.allocator()->free();
+        dst.allocator()->free();
+    }
+
+private:
+    TensorType src1{};
+    TensorType src2{};
+    TensorType dst{};
+    Function   magnitude_func{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_MAGNITUDE_FIXTURE */
diff --git a/tests/benchmark/fixtures/MobileNetFixture.h b/tests/benchmark/fixtures/MobileNetFixture.h
index 660205c..d073bb4 100644
--- a/tests/benchmark/fixtures/MobileNetFixture.h
+++ b/tests/benchmark/fixtures/MobileNetFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,7 +50,6 @@
         network.init(batches);
         network.build();
         network.allocate();
-        network.fill_random();
     }
 
     void run()
@@ -58,6 +57,11 @@
         network.run();
     }
 
+    void sync()
+    {
+        network.sync();
+    }
+
     void teardown()
     {
         network.clear();
diff --git a/tests/benchmark/fixtures/MobileNetV1Fixture.h b/tests/benchmark/fixtures/MobileNetV1Fixture.h
index 07333dd..9b5020a 100644
--- a/tests/benchmark/fixtures/MobileNetV1Fixture.h
+++ b/tests/benchmark/fixtures/MobileNetV1Fixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -53,7 +53,6 @@
         network.init(InputSize, batches);
         network.build();
         network.allocate();
-        network.fill_random();
     }
 
     void run()
@@ -61,6 +60,11 @@
         network.run();
     }
 
+    void sync()
+    {
+        network.sync();
+    }
+
     void teardown()
     {
         network.clear();
diff --git a/tests/benchmark/fixtures/NormalizationLayerFixture.h b/tests/benchmark/fixtures/NormalizationLayerFixture.h
index 41dd8a7..00ab23f 100644
--- a/tests/benchmark/fixtures/NormalizationLayerFixture.h
+++ b/tests/benchmark/fixtures/NormalizationLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,9 +56,6 @@
         // Allocate tensors
         src.allocator()->allocate();
         dst.allocator()->allocate();
-
-        // Fill tensors
-        library->fill_tensor_uniform(Accessor(src), 0);
     }
 
     void run()
@@ -66,6 +63,12 @@
         norm_layer.run();
     }
 
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
     void teardown()
     {
         src.allocator()->free();
diff --git a/tests/benchmark/fixtures/NormalizePlanarYUVLayerFixture.h b/tests/benchmark/fixtures/NormalizePlanarYUVLayerFixture.h
new file mode 100644
index 0000000..af42ba7
--- /dev/null
+++ b/tests/benchmark/fixtures/NormalizePlanarYUVLayerFixture.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_NORMALIZEPLANARYUVLAYERFIXTURE
+#define ARM_COMPUTE_TEST_NORMALIZEPLANARYUVLAYERFIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Fixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+/** Fixture that can be used for NEON and CL */
+template <typename TensorType, typename Function, typename Accessor>
+class NormalizePlanarYUVLayerFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape tensor_shape, TensorShape param_shape, DataType data_type, int batches)
+    {
+        // Set batched in source and destination shapes
+        tensor_shape.set(tensor_shape.num_dimensions(), batches);
+
+        // Create tensors
+        src  = create_tensor<TensorType>(tensor_shape, data_type, 1);
+        dst  = create_tensor<TensorType>(tensor_shape, data_type, 1);
+        mean = create_tensor<TensorType>(param_shape, data_type, 1);
+        sd   = create_tensor<TensorType>(param_shape, data_type, 1);
+
+        // Create and configure function
+        normalize_planar_yuv_layer.configure(&src, &dst, &mean, &sd);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+        mean.allocator()->allocate();
+        sd.allocator()->allocate();
+    }
+
+    void run()
+    {
+        normalize_planar_yuv_layer.run();
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
+    void teardown()
+    {
+        src.allocator()->free();
+        dst.allocator()->free();
+        mean.allocator()->free();
+        sd.allocator()->free();
+    }
+
+private:
+    TensorType src{};
+    TensorType dst{};
+    TensorType mean{};
+    TensorType sd{};
+    Function   normalize_planar_yuv_layer{};
+};
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_NORMALIZEPLANARYUVLAYERFIXTURE */
diff --git a/tests/benchmark/fixtures/PoolingLayerFixture.h b/tests/benchmark/fixtures/PoolingLayerFixture.h
index 2060301..88edea3 100644
--- a/tests/benchmark/fixtures/PoolingLayerFixture.h
+++ b/tests/benchmark/fixtures/PoolingLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -57,9 +57,6 @@
         // Allocate tensors
         src.allocator()->allocate();
         dst.allocator()->allocate();
-
-        // Fill tensors
-        library->fill_tensor_uniform(Accessor(src), 0);
     }
 
     void run()
@@ -67,6 +64,12 @@
         pool_layer.run();
     }
 
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
     void teardown()
     {
         src.allocator()->free();
diff --git a/tests/benchmark/fixtures/QuantizationLayerFixture.h b/tests/benchmark/fixtures/QuantizationLayerFixture.h
new file mode 100644
index 0000000..4b2fc88
--- /dev/null
+++ b/tests/benchmark/fixtures/QuantizationLayerFixture.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_QUANTIZATION_LAYER_FIXTURE
+#define ARM_COMPUTE_TEST_QUANTIZATION_LAYER_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Fixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+template <typename TensorType, typename Function, typename Accessor>
+class QuantizationLayerFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type)
+    {
+        // Create tensors
+        src = create_tensor<TensorType>(shape, data_type);
+        dst = create_tensor<TensorType>(shape, DataType::U8);
+
+        // Create and configure function
+        quantization_func.configure(&src, &dst);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+    }
+
+    void run()
+    {
+        quantization_func.run();
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
+    void teardown()
+    {
+        src.allocator()->free();
+        dst.allocator()->free();
+    }
+
+private:
+    TensorType src{};
+    TensorType dst{};
+    Function   quantization_func{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_QUANTIZATION_LAYER_FIXTURE */
diff --git a/tests/benchmark/fixtures/ROIPoolingLayerFixture.h b/tests/benchmark/fixtures/ROIPoolingLayerFixture.h
index 76c2280..bdd416e 100644
--- a/tests/benchmark/fixtures/ROIPoolingLayerFixture.h
+++ b/tests/benchmark/fixtures/ROIPoolingLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -68,9 +68,6 @@
         // Allocate tensors
         src.allocator()->allocate();
         dst.allocator()->allocate();
-
-        // Fill tensors
-        library->fill_tensor_uniform(Accessor(src), 0);
     }
 
     void run()
@@ -78,6 +75,12 @@
         roi_pool.run();
     }
 
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
     void teardown()
     {
         src.allocator()->free();
diff --git a/tests/benchmark/fixtures/ReshapeLayerFixture.h b/tests/benchmark/fixtures/ReshapeLayerFixture.h
new file mode 100644
index 0000000..85a5b80
--- /dev/null
+++ b/tests/benchmark/fixtures/ReshapeLayerFixture.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_RESHAPEFIXTURE
+#define ARM_COMPUTE_TEST_RESHAPEFIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Fixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+/** Fixture that can be used for NEON and CL */
+template <typename TensorType, typename Function, typename Accessor>
+class ReshapeLayerFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type)
+    {
+        // Create tensors
+        src = create_tensor<TensorType>(shape, data_type);
+        dst = create_tensor<TensorType>(shape, data_type);
+
+        // Create and configure function
+        reshape_func.configure(&src, &dst);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+    }
+
+    void run()
+    {
+        reshape_func.run();
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
+    void teardown()
+    {
+        src.allocator()->free();
+        dst.allocator()->free();
+    }
+
+private:
+    TensorType src{};
+    TensorType dst{};
+    Function   reshape_func{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_RESHAPEFIXTURE */
diff --git a/tests/benchmark/fixtures/ScaleFixture.h b/tests/benchmark/fixtures/ScaleFixture.h
new file mode 100644
index 0000000..cd51f57
--- /dev/null
+++ b/tests/benchmark/fixtures/ScaleFixture.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_SCALE_FIXTURE
+#define ARM_COMPUTE_TEST_SCALE_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Fixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+template <typename TensorType, typename Function, typename Accessor>
+class ScaleFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy)
+    {
+        constexpr float max_width  = 8192.0f;
+        constexpr float max_height = 6384.0f;
+
+        std::mt19937                          generator(library->seed());
+        std::uniform_real_distribution<float> distribution_float(0.25f, 3.0f);
+        float                                 scale_x = distribution_float(generator);
+        float                                 scale_y = distribution_float(generator);
+
+        scale_x = ((shape.x() * scale_x) > max_width) ? (max_width / shape.x()) : scale_x;
+        scale_y = ((shape.y() * scale_y) > max_height) ? (max_height / shape.y()) : scale_y;
+
+        std::uniform_int_distribution<uint8_t> distribution_u8(0, 255);
+        uint8_t                                constant_border_value = static_cast<uint8_t>(distribution_u8(generator));
+
+        TensorShape shape_scaled(shape);
+        shape_scaled.set(0, shape[0] * scale_x);
+        shape_scaled.set(1, shape[1] * scale_y);
+
+        // Create tensors
+        src = create_tensor<TensorType>(shape, data_type);
+        dst = create_tensor<TensorType>(shape_scaled, data_type);
+
+        // Create and configure function
+        scale_func.configure(&src, &dst, policy, border_mode, constant_border_value, sampling_policy);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+    }
+
+    void run()
+    {
+        scale_func.run();
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
+    void teardown()
+    {
+        src.allocator()->free();
+        dst.allocator()->free();
+    }
+
+private:
+    TensorType src{};
+    TensorType dst{};
+    Function   scale_func{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_SCALE_FIXTURE */
diff --git a/tests/benchmark/fixtures/ScaleLayerFixture.h b/tests/benchmark/fixtures/ScaleLayerFixture.h
new file mode 100644
index 0000000..10568ea
--- /dev/null
+++ b/tests/benchmark/fixtures/ScaleLayerFixture.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_SCALELAYERFIXTURE
+#define ARM_COMPUTE_TEST_SCALELAYERFIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+/** Fixture that can be used for NEON, CL and OpenGL ES */
+template <typename TensorType, typename Function, typename Accessor, typename T>
+class ScaleLayerFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy, float sx, float sy, DataType data_type)
+    {
+        constexpr float max_width  = 8192.0f;
+        constexpr float max_height = 6384.0f;
+
+        std::mt19937 generator(library->seed());
+
+        float scale_x = ((shape.x() * sx) > max_width) ? (max_width / shape.x()) : sx;
+        float scale_y = ((shape.y() * sy) > max_height) ? (max_height / shape.y()) : sy;
+
+        std::uniform_int_distribution<uint8_t> distribution_u8(0, 255);
+        T                                      constant_border_value = static_cast<T>(distribution_u8(generator));
+
+        // Create tensors
+        src = create_tensor<TensorType>(shape, data_type);
+        TensorShape shape_scaled(shape);
+        shape_scaled.set(0, shape[0] * scale_x);
+        shape_scaled.set(1, shape[1] * scale_y);
+        dst = create_tensor<TensorType>(shape_scaled, data_type);
+
+        scale_layer.configure(&src, &dst, policy, border_mode, constant_border_value, sampling_policy);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+    }
+
+    void run()
+    {
+        scale_layer.run();
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
+    void teardown()
+    {
+        src.allocator()->free();
+        dst.allocator()->free();
+    }
+
+private:
+    TensorType src{};
+    TensorType dst{};
+    Function   scale_layer{};
+};
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_SCALELAYERFIXTURE */
diff --git a/tests/benchmark/fixtures/SoftmaxLayerFixture.h b/tests/benchmark/fixtures/SoftmaxLayerFixture.h
index 50b2726..95790ff 100644
--- a/tests/benchmark/fixtures/SoftmaxLayerFixture.h
+++ b/tests/benchmark/fixtures/SoftmaxLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,13 +31,6 @@
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Fixture.h"
 
-#ifdef ARM_COMPUTE_GC
-#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
-#include "tests/GLES_COMPUTE/Helper.h"
-
-using namespace arm_compute::test::gles_compute;
-#endif /* ARM_COMPUTE_GC */
-
 namespace arm_compute
 {
 namespace test
@@ -65,20 +58,17 @@
         // Allocate tensors
         src.allocator()->allocate();
         dst.allocator()->allocate();
-
-        // Fill tensors
-        library->fill_tensor_uniform(Accessor(src), 0);
     }
 
     void run()
     {
         smx_layer.run();
-#ifdef ARM_COMPUTE_GC
-        if(opengles31_is_available() && std::is_same<typename std::decay<TensorType>::type, arm_compute::GCTensor>::value)
-        {
-            force_sync_tensor(dst);
-        }
-#endif /* ARM_COMPUTE_GC */
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
     }
 
     void teardown()
diff --git a/tests/benchmark/fixtures/TransposeFixture.h b/tests/benchmark/fixtures/TransposeFixture.h
new file mode 100644
index 0000000..60643aa
--- /dev/null
+++ b/tests/benchmark/fixtures/TransposeFixture.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_TRANSPOSEFIXTURE
+#define ARM_COMPUTE_TEST_TRANSPOSEFIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Fixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+/** Fixture that can be used for NE/CL/GC */
+template <typename TensorType, typename Function, typename Accessor>
+class TransposeFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type)
+    {
+        // Make rows the columns of the original shape
+        TensorShape output_shape{ shape[1], shape[0] };
+
+        // Create tensors
+        src = create_tensor<TensorType>(shape, data_type);
+        dst = create_tensor<TensorType>(output_shape, data_type);
+
+        // Create and configure function
+        transpose.configure(&src, &dst);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+    }
+
+    void run()
+    {
+        transpose.run();
+    }
+
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(dst);
+    }
+
+    void teardown()
+    {
+        src.allocator()->free();
+        dst.allocator()->free();
+    }
+
+private:
+    TensorType src{};
+    TensorType dst{};
+    Function   transpose{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_TRANSPOSEFIXTURE */
diff --git a/tests/datasets/AlexNetGEMMDataset.h b/tests/datasets/AlexNetGEMMDataset.h
new file mode 100644
index 0000000..ac47fb6
--- /dev/null
+++ b/tests/datasets/AlexNetGEMMDataset.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_ALEXNET_GEMM_DATASET
+#define ARM_COMPUTE_TEST_ALEXNET_GEMM_DATASET
+
+#include "tests/datasets/GEMMDataset.h"
+
+#include "arm_compute/core/TensorShape.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class AlexNetGEMMDataset final : public GEMMDataset
+{
+public:
+    AlexNetGEMMDataset()
+    {
+        add_config(TensorShape{ 364U, 3025U }, TensorShape{ 96U, 364U }, TensorShape{ 96U, 3025U }, TensorShape{ 96U, 3025U }, 1.f, 0.f);
+        add_config(TensorShape{ 1201U, 729U }, TensorShape{ 128U, 1201U }, TensorShape{ 128U, 729U }, TensorShape{ 128U, 729U }, 1.f, 0.f);
+        add_config(TensorShape{ 2305U, 169U }, TensorShape{ 384U, 2305U }, TensorShape{ 384U, 169U }, TensorShape{ 384U, 169U }, 1.f, 0.f);
+        add_config(TensorShape{ 1729U, 169U }, TensorShape{ 192U, 1729U }, TensorShape{ 192U, 169U }, TensorShape{ 192U, 169U }, 1.f, 0.f);
+        add_config(TensorShape{ 1729U, 169U }, TensorShape{ 128U, 1729U }, TensorShape{ 128U, 169U }, TensorShape{ 128U, 169U }, 1.f, 0.f);
+        add_config(TensorShape{ 9216U, 1U }, TensorShape{ 4096U, 9216U }, TensorShape{ 4096U, 1U }, TensorShape{ 4096U, 1U }, 1.f, 0.f);
+        add_config(TensorShape{ 4096U, 1U }, TensorShape{ 4096U, 4096U }, TensorShape{ 4096U, 1U }, TensorShape{ 4096U, 1U }, 1.f, 0.f);
+        add_config(TensorShape{ 4096U, 1U }, TensorShape{ 1000U, 4096U }, TensorShape{ 1000U, 1U }, TensorShape{ 1000U, 1U }, 1.f, 0.f);
+    }
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_ALEXNET_GEMM_DATASET */
diff --git a/tests/datasets/DepthwiseConvolutionLayerDataset.h b/tests/datasets/DepthwiseConvolutionLayerDataset.h
index a2caba9..c5a9f96 100644
--- a/tests/datasets/DepthwiseConvolutionLayerDataset.h
+++ b/tests/datasets/DepthwiseConvolutionLayerDataset.h
@@ -38,18 +38,16 @@
 class DepthwiseConvolutionLayerDataset
 {
 public:
-    using type = std::tuple<TensorShape, TensorShape, TensorShape, TensorShape, PadStrideInfo>;
+    using type = std::tuple<TensorShape, TensorShape, TensorShape, PadStrideInfo>;
 
     struct iterator
     {
         iterator(std::vector<TensorShape>::const_iterator   src_it,
                  std::vector<TensorShape>::const_iterator   weights_it,
-                 std::vector<TensorShape>::const_iterator   biases_it,
                  std::vector<TensorShape>::const_iterator   dst_it,
                  std::vector<PadStrideInfo>::const_iterator infos_it)
             : _src_it{ std::move(src_it) },
               _weights_it{ std::move(weights_it) },
-              _biases_it{ std::move(biases_it) },
               _dst_it{ std::move(dst_it) },
               _infos_it{ std::move(infos_it) }
         {
@@ -60,7 +58,6 @@
             std::stringstream description;
             description << "In=" << *_src_it << ":";
             description << "Weights=" << *_weights_it << ":";
-            description << "Biases=" << *_biases_it << ":";
             description << "Out=" << *_dst_it << ":";
             description << "Info=" << *_infos_it;
             return description.str();
@@ -68,14 +65,13 @@
 
         DepthwiseConvolutionLayerDataset::type operator*() const
         {
-            return std::make_tuple(*_src_it, *_weights_it, *_biases_it, *_dst_it, *_infos_it);
+            return std::make_tuple(*_src_it, *_weights_it, *_dst_it, *_infos_it);
         }
 
         iterator &operator++()
         {
             ++_src_it;
             ++_weights_it;
-            ++_biases_it;
             ++_dst_it;
             ++_infos_it;
 
@@ -85,26 +81,24 @@
     private:
         std::vector<TensorShape>::const_iterator   _src_it;
         std::vector<TensorShape>::const_iterator   _weights_it;
-        std::vector<TensorShape>::const_iterator   _biases_it;
         std::vector<TensorShape>::const_iterator   _dst_it;
         std::vector<PadStrideInfo>::const_iterator _infos_it;
     };
 
     iterator begin() const
     {
-        return iterator(_src_shapes.begin(), _weight_shapes.begin(), _biases_shapes.begin(), _dst_shapes.begin(), _infos.begin());
+        return iterator(_src_shapes.begin(), _weight_shapes.begin(), _dst_shapes.begin(), _infos.begin());
     }
 
     int size() const
     {
-        return std::min(_src_shapes.size(), std::min(_weight_shapes.size(), std::min(_biases_shapes.size(), std::min(_dst_shapes.size(), _infos.size()))));
+        return std::min(_src_shapes.size(), std::min(_weight_shapes.size(), std::min(_dst_shapes.size(), _infos.size())));
     }
 
-    void add_config(TensorShape src, TensorShape weights, TensorShape biases, TensorShape dst, PadStrideInfo info)
+    void add_config(TensorShape src, TensorShape weights, TensorShape dst, PadStrideInfo info)
     {
         _src_shapes.emplace_back(std::move(src));
         _weight_shapes.emplace_back(std::move(weights));
-        _biases_shapes.emplace_back(std::move(biases));
         _dst_shapes.emplace_back(std::move(dst));
         _infos.emplace_back(std::move(info));
     }
@@ -116,7 +110,6 @@
 private:
     std::vector<TensorShape>   _src_shapes{};
     std::vector<TensorShape>   _weight_shapes{};
-    std::vector<TensorShape>   _biases_shapes{};
     std::vector<TensorShape>   _dst_shapes{};
     std::vector<PadStrideInfo> _infos{};
 };
@@ -125,20 +118,20 @@
 public:
     SmallDepthwiseConvolutionLayerDataset()
     {
-        add_config(TensorShape(7U, 7U, 3U), TensorShape(3U, 3U, 3U), TensorShape(3U), TensorShape(5U, 5U, 3U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(23U, 27U, 5U), TensorShape(3U, 5U, 5U), TensorShape(5U), TensorShape(11U, 23U, 5U), PadStrideInfo(2, 1, 0, 0));
-        add_config(TensorShape(33U, 27U, 7U), TensorShape(7U, 3U, 7U), TensorShape(7U), TensorShape(10U, 13U, 7U), PadStrideInfo(3, 2, 1, 0));
-        add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(31U, 14U, 11U), PadStrideInfo(1, 2, 0, 1));
-        add_config(TensorShape(17U, 31U, 2U), TensorShape(5U, 9U, 2U), TensorShape(2U), TensorShape(15U, 13U, 2U), PadStrideInfo(1, 2, 1, 1));
-        add_config(TensorShape(23U, 27U, 5U), TensorShape(11U, 3U, 5U), TensorShape(5U), TensorShape(13U, 13U, 5U), PadStrideInfo(1, 2, 0, 0));
-        add_config(TensorShape(17U, 31U, 2U, 3U), TensorShape(5U, 9U, 2U), TensorShape(2U), TensorShape(15U, 13U, 2U, 3U), PadStrideInfo(1, 2, 1, 1));
+        add_config(TensorShape(7U, 7U, 3U), TensorShape(3U, 3U, 3U), TensorShape(5U, 5U, 3U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(23U, 27U, 5U), TensorShape(3U, 5U, 5U), TensorShape(11U, 23U, 5U), PadStrideInfo(2, 1, 0, 0));
+        add_config(TensorShape(33U, 27U, 7U), TensorShape(7U, 3U, 7U), TensorShape(10U, 13U, 7U), PadStrideInfo(3, 2, 1, 0));
+        add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(31U, 14U, 11U), PadStrideInfo(1, 2, 0, 1));
+        add_config(TensorShape(17U, 31U, 2U), TensorShape(5U, 9U, 2U), TensorShape(15U, 13U, 2U), PadStrideInfo(1, 2, 1, 1));
+        add_config(TensorShape(23U, 27U, 5U), TensorShape(11U, 3U, 5U), TensorShape(13U, 13U, 5U), PadStrideInfo(1, 2, 0, 0));
+        add_config(TensorShape(17U, 31U, 2U, 3U), TensorShape(5U, 9U, 2U), TensorShape(15U, 13U, 2U, 3U), PadStrideInfo(1, 2, 1, 1));
         // Asymmetric padding
-        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 0, 2, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 1, 0, 1, 0, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 0, 2, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 1, 0, 1, 0, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
     }
 };
 
@@ -147,12 +140,12 @@
 public:
     LargeDepthwiseConvolutionLayerDataset()
     {
-        add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(116U, 275U, 55U), PadStrideInfo(2, 1, 0, 0));
-        add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0));
-        add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1));
-        add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0));
-        add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(166U, 93U, 77U), PadStrideInfo(2, 3, 0, 1));
-        add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1));
+        add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(116U, 275U, 55U), PadStrideInfo(2, 1, 0, 0));
+        add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0));
+        add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1));
+        add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0));
+        add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(166U, 93U, 77U), PadStrideInfo(2, 3, 0, 1));
+        add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1));
     }
 };
 
@@ -161,10 +154,10 @@
 public:
     SmallDepthwiseConvolutionLayerDataset3x3()
     {
-        add_config(TensorShape(7U, 7U, 3U, 2U), TensorShape(3U, 3U, 3U), TensorShape(3U), TensorShape(5U, 5U, 3U, 2U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(11U, 14U, 11U), PadStrideInfo(3, 2, 1, 1));
-        add_config(TensorShape(21U, 31U, 9U, 4U), TensorShape(3U, 3U, 9U), TensorShape(9U), TensorShape(21U, 15U, 9U, 4U), PadStrideInfo(1, 2, 1, 0));
-        add_config(TensorShape(33U, 27U, 11U, 3U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(31U, 14U, 11U, 3U), PadStrideInfo(1, 2, 0, 1));
+        add_config(TensorShape(7U, 7U, 3U, 2U), TensorShape(3U, 3U, 3U), TensorShape(5U, 5U, 3U, 2U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U, 14U, 11U), PadStrideInfo(3, 2, 1, 1));
+        add_config(TensorShape(21U, 31U, 9U, 4U), TensorShape(3U, 3U, 9U), TensorShape(21U, 15U, 9U, 4U), PadStrideInfo(1, 2, 1, 0));
+        add_config(TensorShape(33U, 27U, 11U, 3U), TensorShape(3U, 3U, 11U), TensorShape(31U, 14U, 11U, 3U), PadStrideInfo(1, 2, 0, 1));
     }
 };
 
@@ -173,12 +166,12 @@
 public:
     LargeDepthwiseConvolutionLayerDataset3x3()
     {
-        add_config(TensorShape(233U, 277U, 55U, 3U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(116U, 275U, 55U, 3U), PadStrideInfo(2, 1, 0, 0));
-        add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0));
-        add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1));
-        add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0));
-        add_config(TensorShape(333U, 277U, 77U, 5U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(166U, 93U, 77U, 5U), PadStrideInfo(2, 3, 0, 1));
-        add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1));
+        add_config(TensorShape(233U, 277U, 55U, 3U), TensorShape(3U, 3U, 55U), TensorShape(116U, 275U, 55U, 3U), PadStrideInfo(2, 1, 0, 0));
+        add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0));
+        add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1));
+        add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0));
+        add_config(TensorShape(333U, 277U, 77U, 5U), TensorShape(3U, 3U, 77U), TensorShape(166U, 93U, 77U, 5U), PadStrideInfo(2, 3, 0, 1));
+        add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1));
     }
 };
 } // namespace datasets
diff --git a/tests/datasets/HOGDescriptorDataset.h b/tests/datasets/HOGDescriptorDataset.h
new file mode 100644
index 0000000..73c6494
--- /dev/null
+++ b/tests/datasets/HOGDescriptorDataset.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_HOG_DESCRIPTOR_DATASET
+#define ARM_COMPUTE_TEST_HOG_DESCRIPTOR_DATASET
+
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class HOGDescriptorDataset
+{
+public:
+    using type = std::tuple<std::string, HOGInfo>;
+
+    struct iterator
+    {
+        iterator(std::vector<std::string>::const_iterator image_it,
+                 std::vector<HOGInfo>::const_iterator     hog_info_it)
+            : _image_it{ std::move(image_it) },
+              _hog_info_it{ std::move(hog_info_it) }
+        {
+        }
+
+        std::string description() const
+        {
+            std::stringstream description;
+            description << "Image=" << *_image_it << ":";
+            description << "HOGInfo=" << *_hog_info_it;
+
+            return description.str();
+        }
+
+        HOGDescriptorDataset::type operator*() const
+        {
+            return std::make_tuple(*_image_it, *_hog_info_it);
+        }
+
+        iterator &operator++()
+        {
+            ++_image_it;
+            ++_hog_info_it;
+
+            return *this;
+        }
+
+    private:
+        std::vector<std::string>::const_iterator _image_it;
+        std::vector<HOGInfo>::const_iterator     _hog_info_it;
+    };
+
+    iterator begin() const
+    {
+        return iterator(_image.begin(), _hog_info.begin());
+    }
+
+    int size() const
+    {
+        return std::min(_image.size(), _hog_info.size());
+    }
+
+    void add_config(std::string image,
+                    Size2D cell_size, Size2D block_size, Size2D detection_window_size, Size2D block_stride,
+                    size_t num_bins, HOGNormType normalization_type, float l2_hyst_threshold, PhaseType phase_type)
+    {
+        _image.emplace_back(std::move(image));
+        _hog_info.emplace_back(HOGInfo(cell_size, block_size, detection_window_size, block_stride, num_bins, normalization_type, l2_hyst_threshold, phase_type));
+    }
+
+protected:
+    HOGDescriptorDataset()                        = default;
+    HOGDescriptorDataset(HOGDescriptorDataset &&) = default;
+
+private:
+    std::vector<std::string> _image{};
+    std::vector<HOGInfo>     _hog_info{};
+};
+
+// *INDENT-OFF*
+// clang-format off
+class SmallHOGDescriptorDataset final : public HOGDescriptorDataset
+{
+public:
+    SmallHOGDescriptorDataset()
+    {
+        //         image          cell_size       block_size        detection_size     block_stride    bin  normalization_type       thresh phase_type
+        add_config("800x600.ppm", Size2D(8U, 8U), Size2D(16U, 16U), Size2D(64U, 128U), Size2D(8U, 8U), 9U,  HOGNormType::L2HYS_NORM, 0.2f,  PhaseType::SIGNED);
+        add_config("800x600.ppm", Size2D(8U, 8U), Size2D(16U, 16U), Size2D(64U, 128U), Size2D(8U, 8U), 9U,  HOGNormType::L2HYS_NORM, 0.2f,  PhaseType::UNSIGNED);
+    }
+};
+
+class LargeHOGDescriptorDataset final : public HOGDescriptorDataset
+{
+public:
+    LargeHOGDescriptorDataset()
+    {
+        //         image            cell_size       block_size        detection_size     block_stride    bin  normalization_type       thresh phase_type
+        add_config("1920x1080.ppm", Size2D(8U, 8U), Size2D(16U, 16U), Size2D(64U, 128U), Size2D(8U, 8U), 9U,  HOGNormType::L2HYS_NORM, 0.2f,  PhaseType::SIGNED);
+        add_config("1920x1080.ppm", Size2D(8U, 8U), Size2D(16U, 16U), Size2D(64U, 128U), Size2D(8U, 8U), 9U,  HOGNormType::L2_NORM,    0.2f,  PhaseType::SIGNED);
+        add_config("1920x1080.ppm", Size2D(8U, 8U), Size2D(16U, 16U), Size2D(64U, 128U), Size2D(8U, 8U), 9U,  HOGNormType::L1_NORM,    0.2f,  PhaseType::SIGNED);
+
+        add_config("1920x1080.ppm", Size2D(8U, 8U), Size2D(16U, 16U), Size2D(64U, 128U), Size2D(8U, 8U), 9U,  HOGNormType::L2HYS_NORM, 0.2f,  PhaseType::UNSIGNED);
+        add_config("1920x1080.ppm", Size2D(8U, 8U), Size2D(16U, 16U), Size2D(64U, 128U), Size2D(8U, 8U), 9U,  HOGNormType::L2_NORM,    0.2f,  PhaseType::UNSIGNED);
+        add_config("1920x1080.ppm", Size2D(8U, 8U), Size2D(16U, 16U), Size2D(64U, 128U), Size2D(8U, 8U), 9U,  HOGNormType::L1_NORM,    0.2f,  PhaseType::UNSIGNED);
+    }
+};
+// clang-format on
+// *INDENT-ON*
+
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_HOG_DESCRIPTOR_DATASET */
diff --git a/tests/datasets/ImageFileDatasets.h b/tests/datasets/ImageFileDatasets.h
new file mode 100644
index 0000000..90a7f0d
--- /dev/null
+++ b/tests/datasets/ImageFileDatasets.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_IMAGE_FILE_DATASET
+#define ARM_COMPUTE_TEST_IMAGE_FILE_DATASET
+
+#include "tests/framework/datasets/Datasets.h"
+
+#include <type_traits>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class ImageFileDataset
+{
+public:
+    struct iterator
+    {
+        iterator(std::vector<std::string>::const_iterator name_it)
+            : _name_it{ std::move(name_it) }
+        {
+        }
+
+        std::string description() const
+        {
+            std::stringstream description;
+            description << "ImageFile=" << *_name_it;
+            return description.str();
+        }
+
+        std::tuple<std::string> operator*() const
+        {
+            return std::make_tuple(*_name_it);
+        }
+
+        iterator &operator++()
+        {
+            ++_name_it;
+
+            return *this;
+        }
+
+    private:
+        std::vector<std::string>::const_iterator _name_it;
+    };
+
+    iterator begin() const
+    {
+        return iterator(_names.begin());
+    }
+
+    int size() const
+    {
+        return _names.size();
+    }
+
+    void add_image_file(std::string name)
+    {
+        _names.emplace_back(std::move(name));
+    }
+
+protected:
+    ImageFileDataset()                    = default;
+    ImageFileDataset(ImageFileDataset &&) = default;
+
+private:
+    std::vector<std::string> _names{};
+};
+
+/** Data set containing names of small image files. */
+class SmallImageFiles final : public ImageFileDataset
+{
+public:
+    SmallImageFiles()
+    {
+        add_image_file("640x480.ppm");
+        add_image_file("800x600.ppm");
+        add_image_file("1280x720.ppm");
+    }
+};
+
+/** Data set containing names of small image files. */
+class LargeImageFiles final : public ImageFileDataset
+{
+public:
+    LargeImageFiles()
+    {
+        add_image_file("1920x1080.ppm");
+        add_image_file("4160x3120.ppm");
+        add_image_file("5120x3200.ppm");
+    }
+};
+
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_IMAGE_FILE_DATASET */
diff --git a/tests/datasets/NormalizePlanarYUVLayerDataset.h b/tests/datasets/NormalizePlanarYUVLayerDataset.h
new file mode 100644
index 0000000..2d71a56
--- /dev/null
+++ b/tests/datasets/NormalizePlanarYUVLayerDataset.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_NORMALIZE_PLANAR_YUV_LAYER_DATASET
+#define ARM_COMPUTE_TEST_NORMALIZE_PLANAR_YUV_LAYER_DATASET
+
+#include "utils/TypePrinter.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class NormalizePlanarYUVLayerDataset
+{
+public:
+    using type = std::tuple<TensorShape, TensorShape>;
+
+    struct iterator
+    {
+        iterator(std::vector<TensorShape>::const_iterator tensor_it,
+                 std::vector<TensorShape>::const_iterator param_it)
+            : _tensor_it{ std::move(tensor_it) },
+              _param_it{ std::move(param_it) }
+        {
+        }
+
+        std::string description() const
+        {
+            std::stringstream description;
+            description << "In=" << *_tensor_it << ":";
+            description << "Out=" << *_tensor_it << ":";
+            description << "Mean=" << *_param_it << ":";
+            description << "Sd=" << *_param_it << ":";
+            return description.str();
+        }
+
+        NormalizePlanarYUVLayerDataset::type operator*() const
+        {
+            return std::make_tuple(*_tensor_it, *_param_it);
+        }
+
+        iterator &operator++()
+        {
+            ++_tensor_it;
+            ++_param_it;
+
+            return *this;
+        }
+
+    private:
+        std::vector<TensorShape>::const_iterator _tensor_it;
+        std::vector<TensorShape>::const_iterator _param_it;
+    };
+
+    iterator begin() const
+    {
+        return iterator(_tensor_shapes.begin(), _param_shapes.begin());
+    }
+
+    int size() const
+    {
+        return std::min(_tensor_shapes.size(), _param_shapes.size());
+    }
+
+    void add_config(TensorShape tensor, TensorShape param)
+    {
+        _tensor_shapes.emplace_back(std::move(tensor));
+        _param_shapes.emplace_back(std::move(param));
+    }
+
+protected:
+    NormalizePlanarYUVLayerDataset()                                  = default;
+    NormalizePlanarYUVLayerDataset(NormalizePlanarYUVLayerDataset &&) = default;
+
+private:
+    std::vector<TensorShape> _tensor_shapes{};
+    std::vector<TensorShape> _param_shapes{};
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_NORMALIZE_PLANAR_YUV_LAYER_DATASET */
diff --git a/tests/datasets/MobileNetDepthwiseSeparableConvolutionLayerDataset.h b/tests/datasets/RandomNormalizePlanarYUVLayerDataset.h
similarity index 63%
copy from tests/datasets/MobileNetDepthwiseSeparableConvolutionLayerDataset.h
copy to tests/datasets/RandomNormalizePlanarYUVLayerDataset.h
index eef0000..5693004 100644
--- a/tests/datasets/MobileNetDepthwiseSeparableConvolutionLayerDataset.h
+++ b/tests/datasets/RandomNormalizePlanarYUVLayerDataset.h
@@ -21,10 +21,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_MOBILENET_DEPTHWISE_SEPARABLE_CONVOLUTION_LAYER_DATASET
-#define ARM_COMPUTE_TEST_MOBILENET_DEPTHWISE_SEPARABLE_CONVOLUTION_LAYER_DATASET
+#ifndef ARM_COMPUTE_TEST_RANDOM_NORMALIZE_PLANAR_YUV_LAYER_DATASET
+#define ARM_COMPUTE_TEST_RANDOM_NORMALIZE_PLANAR_YUV_LAYER_DATASET
 
-#include "tests/datasets/DepthwiseSeparableConvolutionLayerDataset.h"
+#include "tests/datasets/NormalizePlanarYUVLayerDataset.h"
 
 #include "utils/TypePrinter.h"
 
@@ -37,16 +37,18 @@
 {
 namespace datasets
 {
-class MobileNetDepthwiseSeparableConvolutionLayerDataset final : public DepthwiseSeparableConvolutionLayerDataset
+class RandomNormalizePlanarYUVLayerDataset final : public NormalizePlanarYUVLayerDataset
 {
 public:
-    MobileNetDepthwiseSeparableConvolutionLayerDataset()
+    RandomNormalizePlanarYUVLayerDataset()
     {
-        add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U), TensorShape(512U), TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 512U), TensorShape(512U), TensorShape(14U, 14U, 512U),
-                   PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::FLOOR), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(15U, 4U, 4U, 1U), TensorShape(4U));
+        add_config(TensorShape(21U, 11U, 12U, 1U), TensorShape(12U));
+        add_config(TensorShape(7U, 3U, 6U, 1U), TensorShape(6U));
+        add_config(TensorShape(7U, 2U, 3U, 1U), TensorShape(3U));
     }
 };
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_MOBILENET_DEPTHWISE_SEPARABLE_CONVOLUTION_LAYER_DATASET */
+#endif /* ARM_COMPUTE_TEST_RANDOM_NORMALIZE_PLANAR_YUV_LAYER_DATASET */
diff --git a/tests/datasets/ScaleLayerDataset.h b/tests/datasets/ScaleLayerDataset.h
new file mode 100644
index 0000000..8b5fbbe
--- /dev/null
+++ b/tests/datasets/ScaleLayerDataset.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_SCALE_LAYER_DATASET
+#define ARM_COMPUTE_TEST_SCALE_LAYER_DATASET
+
+#include "utils/TypePrinter.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class ScaleLayerDataset
+{
+public:
+    using type = std::tuple<TensorShape, InterpolationPolicy, BorderMode, SamplingPolicy, float, float>;
+
+    struct iterator
+    {
+        iterator(std::vector<TensorShape>::const_iterator         src_it,
+                 std::vector<InterpolationPolicy>::const_iterator policy_it,
+                 std::vector<BorderMode>::const_iterator          border_mode_it,
+                 std::vector<SamplingPolicy>::const_iterator      sampling_policy_it,
+                 std::vector<float>::const_iterator               scale_x_it,
+                 std::vector<float>::const_iterator               scale_y_it)
+            : _src_it{ std::move(src_it) },
+              _policy_it{ std::move(policy_it) },
+              _border_mode_it{ std::move(border_mode_it) },
+              _sampling_policy_it{ std::move(sampling_policy_it) },
+              _scale_x_it{ std::move(scale_x_it) },
+              _scale_y_it{ std::move(scale_y_it) }
+        {
+        }
+
+        std::string description() const
+        {
+            std::stringstream description;
+            description << "In=" << *_src_it << ":";
+            description << "InterpolationPolicy=" << *_policy_it << ":";
+            description << "BorderMode=" << *_border_mode_it << ":";
+            description << "SamplingPolicy=" << *_sampling_policy_it << ":";
+            description << "Scale_x=" << *_scale_x_it << ":";
+            description << "Scale_y=" << *_scale_y_it;
+            return description.str();
+        }
+
+        ScaleLayerDataset::type operator*() const
+        {
+            return std::make_tuple(*_src_it, *_policy_it, *_border_mode_it, *_sampling_policy_it, *_scale_x_it, *_scale_y_it);
+        }
+
+        iterator &operator++()
+        {
+            ++_src_it;
+            ++_policy_it;
+            ++_border_mode_it;
+            ++_sampling_policy_it;
+            ++_scale_x_it;
+            ++_scale_y_it;
+
+            return *this;
+        }
+
+    private:
+        std::vector<TensorShape>::const_iterator         _src_it;
+        std::vector<InterpolationPolicy>::const_iterator _policy_it;
+        std::vector<BorderMode>::const_iterator          _border_mode_it;
+        std::vector<SamplingPolicy>::const_iterator      _sampling_policy_it;
+        std::vector<float>::const_iterator               _scale_x_it;
+        std::vector<float>::const_iterator               _scale_y_it;
+    };
+
+    iterator begin() const
+    {
+        return iterator(_src_shapes.begin(), _policy.begin(), _border_mode.begin(), _sampling_policy.begin(), _scale_x.begin(), _scale_y.begin());
+    }
+
+    int size() const
+    {
+        return std::min(_src_shapes.size(), std::min(_policy.size(), std::min(_border_mode.size(), std::min(_sampling_policy.size(), std::min(_scale_x.size(), _scale_y.size())))));
+    }
+
+    void add_config(TensorShape src, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy, float scale_x, float scale_y)
+    {
+        _src_shapes.emplace_back(std::move(src));
+        _policy.emplace_back(std::move(policy));
+        _border_mode.emplace_back(std::move(border_mode));
+        _sampling_policy.emplace_back(std::move(sampling_policy));
+        _scale_x.emplace_back(std::move(scale_x));
+        _scale_y.emplace_back(std::move(scale_y));
+    }
+
+protected:
+    ScaleLayerDataset()                     = default;
+    ScaleLayerDataset(ScaleLayerDataset &&) = default;
+
+private:
+    std::vector<TensorShape>         _src_shapes{};
+    std::vector<InterpolationPolicy> _policy{};
+    std::vector<BorderMode>          _border_mode{};
+    std::vector<SamplingPolicy>      _sampling_policy{};
+    std::vector<float>               _scale_x{};
+    std::vector<float>               _scale_y{};
+};
+
+/** Data set containing small scale layer shapes. */
+class SmallScaleLayerShapes final : public ScaleLayerDataset
+{
+public:
+    SmallScaleLayerShapes()
+    {
+        add_config(TensorShape(128U, 64U, 1U, 3U), InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED, SamplingPolicy::CENTER, 5, 5);
+        add_config(TensorShape(9U, 9U, 3U, 4U), InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED, SamplingPolicy::CENTER, 7, 7);
+        add_config(TensorShape(27U, 13U, 2U, 4U), InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED, SamplingPolicy::CENTER, 9, 9);
+    }
+};
+
+/** Data set containing large scale layer shapes. */
+class LargeScaleLayerShapes final : public ScaleLayerDataset
+{
+public:
+    LargeScaleLayerShapes()
+    {
+        add_config(TensorShape(1920U, 1080U), InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED, SamplingPolicy::CENTER, 0.5, 0.5);
+        add_config(TensorShape(640U, 480U, 2U, 3U), InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED, SamplingPolicy::CENTER, 0.5, 0.5);
+        add_config(TensorShape(4160U, 3120U), InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED, SamplingPolicy::CENTER, 0.5, 0.5);
+        add_config(TensorShape(800U, 600U, 1U, 4U), InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED, SamplingPolicy::CENTER, 0.5, 0.5);
+
+        add_config(TensorShape(1920U, 1080U), InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED, SamplingPolicy::CENTER, 2, 2);
+        add_config(TensorShape(640U, 480U, 2U, 3U), InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED, SamplingPolicy::CENTER, 2, 2);
+        add_config(TensorShape(4160U, 3120U), InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED, SamplingPolicy::CENTER, 2, 2);
+        add_config(TensorShape(800U, 600U, 1U, 4U), InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED, SamplingPolicy::CENTER, 2, 2);
+
+        add_config(TensorShape(1920U, 1080U), InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED, SamplingPolicy::CENTER, 3, 3);
+        add_config(TensorShape(640U, 480U, 2U, 3U), InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED, SamplingPolicy::CENTER, 3, 3);
+        add_config(TensorShape(4160U, 3120U), InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED, SamplingPolicy::CENTER, 3, 3);
+        add_config(TensorShape(800U, 600U, 1U, 4U), InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED, SamplingPolicy::CENTER, 3, 3);
+    }
+};
+
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_SCALE_LAYER_DATASET */
diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h
index c9e5510..a5e03c7 100644
--- a/tests/datasets/ShapeDatasets.h
+++ b/tests/datasets/ShapeDatasets.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -239,7 +239,7 @@
     SmallDeconvolutionShapes()
         : ShapeDataset("InputShape",
     {
-        TensorShape{ 2U, 3U, 3U, 2U },
+        TensorShape{ 4U, 3U, 3U, 2U },
                      TensorShape{ 5U, 5U, 3U },
                      TensorShape{ 11U, 13U, 4U, 3U }
     })
@@ -337,6 +337,35 @@
     }
 };
 
+/** Data set containing 2D tensor shapes relative to an image size. */
+class SmallImageShapes final : public ShapeDataset
+{
+public:
+    SmallImageShapes()
+        : ShapeDataset("Shape",
+    {
+        TensorShape{ 640U, 480U },
+                     TensorShape{ 800U, 600U },
+                     TensorShape{ 1200U, 800U }
+    })
+    {
+    }
+};
+
+/** Data set containing 2D tensor shapes relative to an image size. */
+class LargeImageShapes final : public ShapeDataset
+{
+public:
+    LargeImageShapes()
+        : ShapeDataset("Shape",
+    {
+        TensorShape{ 1920U, 1080U },
+                     TensorShape{ 2560U, 1536U },
+                     TensorShape{ 3584U, 2048U }
+    })
+    {
+    }
+};
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1ConvolutionLayerDataset.h b/tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1ConvolutionLayerDataset.h
index b494bf4..191452c 100644
--- a/tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1ConvolutionLayerDataset.h
+++ b/tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1ConvolutionLayerDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4NormalizePlanarYUVLayerDataset.h b/tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4NormalizePlanarYUVLayerDataset.h
new file mode 100644
index 0000000..e0da484
--- /dev/null
+++ b/tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4NormalizePlanarYUVLayerDataset.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_GOOGLENETINCEPTIONV4_NORMALIZEPLANARYUV_LAYER_DATASET
+#define ARM_COMPUTE_TEST_GOOGLENETINCEPTIONV4_NORMALIZEPLANARYUV_LAYER_DATASET
+
+#include "tests/datasets/NormalizePlanarYUVLayerDataset.h"
+
+#include "utils/TypePrinter.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class GoogLeNetInceptionV4NormalizePlanarYUVLayerDataset final : public NormalizePlanarYUVLayerDataset
+{
+public:
+    GoogLeNetInceptionV4NormalizePlanarYUVLayerDataset()
+    {
+        // conv1_3x3_s2_bn
+        add_config(TensorShape(149U, 149U, 32U), TensorShape(32U));
+        // conv2_3x3_s1_bn
+        add_config(TensorShape(147U, 147U, 32U), TensorShape(32U));
+        // conv3_3x3_s1_bn
+        add_config(TensorShape(147U, 147U, 64U), TensorShape(64U));
+        // inception_stem1_3x3_s2_bn
+        add_config(TensorShape(73U, 73U, 96U), TensorShape(96U));
+        // inception_stem2_3x3_reduce_bn, inception_stem2_1x7_reduce_bn, inception_stem2_1x7_bn, inception_stem2_7x1_bn
+        add_config(TensorShape(73U, 73U, 64U), TensorShape(64U));
+        // inception_stem2_3x3_bn, inception_stem2_3x3_2_bn
+        add_config(TensorShape(71U, 71U, 96U), TensorShape(96U));
+        // inception_stem3_3x3_s2_bn, reduction_a_3x3_2_reduce_bn
+        add_config(TensorShape(35U, 35U, 192U), TensorShape(192U));
+        // inception_a1_1x1_2_bn, inception_a1_3x3_bn, inception_a1_3x3_2_bn, inception_a1_3x3_3_bn, inception_a1_1x1_bn, inception_a2_1x1_2_bn, inception_a2_3x3_bn, inception_a2_3x3_2_bn, inception_a2_3x3_3_bn, inception_a2_1x1_bn, inception_a3_1x1_2_bn, inception_a3_3x3_bn, inception_a3_3x3_2_bn, inception_a3_3x3_3_bn, inception_a3_1x1_bn, inception_a4_1x1_2_bn, inception_a4_3x3_bn, inception_a4_3x3_2_bn, inception_a4_3x3_3_bn, inception_a4_1x1_bn
+        add_config(TensorShape(35U, 35U, 96U), TensorShape(96U));
+        // inception_a1_3x3_reduce_bn, inception_a1_3x3_2_reduce_bn, inception_a2_3x3_reduce_bn, inception_a2_3x3_2_reduce_bn, inception_a3_3x3_reduce_bn, inception_a3_3x3_2_reduce_bn, inception_a4_3x3_reduce_bn, inception_a4_3x3_2_reduce_bn
+        add_config(TensorShape(35U, 35U, 64U), TensorShape(64U));
+        // reduction_a_3x3_bn, inception_b1_1x1_2_bn, inception_b2_1x1_2_bn, inception_b3_1x1_2_bn, inception_b4_1x1_2_bn, inception_b5_1x1_2_bn, inception_b6_1x1_2_bn, inception_b7_1x1_2_bn
+        add_config(TensorShape(17U, 17U, 384U), TensorShape(384U));
+        // reduction_a_3x3_2_bn
+        add_config(TensorShape(35U, 35U, 224U), TensorShape(224U));
+        // reduction_a_3x3_3_bn, inception_b1_7x1_bn, inception_b1_1x7_3_bn, inception_b2_7x1_bn, inception_b2_1x7_3_bn, inception_b3_7x1_bn, inception_b3_1x7_3_bn, inception_b4_7x1_bn, inception_b4_1x7_3_bn, inception_b5_7x1_bn, inception_b5_1x7_3_bn, inception_b6_7x1_bn, inception_b6_1x7_3_bn, inception_b7_7x1_bn, inception_b7_1x7_3_bn, reduction_b_1x7_reduce_bn, reduction_b_1x7_bn
+        add_config(TensorShape(17U, 17U, 256U), TensorShape(256U));
+        // inception_b1_1x7_reduce_bn, inception_b1_7x1_2_reduce_bn, inception_b1_7x1_2_bn, inception_b2_1x7_reduce_bn, inception_b2_7x1_2_reduce_bn, inception_b2_7x1_2_bn, inception_b3_1x7_reduce_bn, inception_b3_7x1_2_reduce_bn, inception_b3_7x1_2_bn, inception_b4_1x7_reduce_bn, inception_b4_7x1_2_reduce_bn, inception_b4_7x1_2_bn, inception_b5_1x7_reduce_bn, inception_b5_7x1_2_reduce_bn, inception_b5_7x1_2_bn, inception_b6_1x7_reduce_bn, inception_b6_7x1_2_reduce_bn, inception_b6_7x1_2_bn, inception_b7_1x7_reduce_bn, inception_b7_7x1_2_reduce_bn, inception_b7_7x1_2_bn, reduction_b_3x3_reduce_bn
+        add_config(TensorShape(17U, 17U, 192U), TensorShape(192U));
+        // inception_b1_1x7_bn, inception_b1_1x7_2_bn, inception_b1_7x1_3_bn, inception_b2_1x7_bn, inception_b2_1x7_2_bn, inception_b2_7x1_3_bn, inception_b3_1x7_bn, inception_b3_1x7_2_bn, inception_b3_7x1_3_bn, inception_b4_1x7_bn, inception_b4_1x7_2_bn, inception_b4_7x1_3_bn, inception_b5_1x7_bn, inception_b5_1x7_2_bn, inception_b5_7x1_3_bn, inception_b6_1x7_bn, inception_b6_1x7_2_bn, inception_b6_7x1_3_bn, inception_b7_1x7_bn, inception_b7_1x7_2_bn, inception_b7_7x1_3_bn
+        add_config(TensorShape(17U, 17U, 224U), TensorShape(224U));
+        // inception_b1_1x1_bn, inception_b2_1x1_bn, inception_b3_1x1_bn, inception_b4_1x1_bn, inception_b5_1x1_bn, inception_b6_1x1_bn, inception_b7_1x1_bn
+        add_config(TensorShape(17U, 17U, 128U), TensorShape(128U));
+        // reduction_b_3x3_bn
+        add_config(TensorShape(8U, 8U, 192U), TensorShape(192U));
+        // reduction_b_7x1_bn
+        add_config(TensorShape(17U, 17U, 320U), TensorShape(320U));
+        // reduction_b_3x3_2_bn
+        add_config(TensorShape(8U, 8U, 320U), TensorShape(320U));
+        // inception_c1_1x1_2_bn, inception_c1_1x3_bn, inception_c1_3x1_bn, inception_c1_1x3_3_bn, inception_c1_3x1_3_bn, inception_c1_1x1_bn, inception_c2_1x1_2_bn, inception_c2_1x3_bn, inception_c2_3x1_bn, inception_c2_1x3_3_bn, inception_c2_3x1_3_bn, inception_c2_1x1_bn, inception_c3_1x1_2_bn, inception_c3_1x3_bn, inception_c3_3x1_bn, inception_c3_1x3_3_bn, inception_c3_3x1_3_bn, inception_c3_1x1_bn
+        add_config(TensorShape(8U, 8U, 256U), TensorShape(256U));
+        // inception_c1_1x1_3_bn, inception_c1_1x1_4_bn, inception_c2_1x1_3_bn, inception_c2_1x1_4_bn, inception_c3_1x1_3_bn, inception_c3_1x1_4_bn
+        add_config(TensorShape(8U, 8U, 384U), TensorShape(384U));
+        // inception_c1_3x1_2_bn, inception_c2_3x1_2_bn, inception_c3_3x1_2_bn
+        add_config(TensorShape(8U, 8U, 448U), TensorShape(448U));
+        // inception_c1_1x3_2_bn, inception_c2_1x3_2_bn, inception_c3_1x3_2_bn
+        add_config(TensorShape(8U, 8U, 512U), TensorShape(512U));
+    }
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_GOOGLENETINCEPTIONV4_NORMALIZEPLANARYUV_LAYER_DATASET */
diff --git a/tests/datasets/system_tests/mobilenet/MobileNetActivationLayerDataset.h b/tests/datasets/system_tests/mobilenet/MobileNetActivationLayerDataset.h
new file mode 100644
index 0000000..64cd918
--- /dev/null
+++ b/tests/datasets/system_tests/mobilenet/MobileNetActivationLayerDataset.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_MOBILENET_ACTIVATION_LAYER_DATASET
+#define ARM_COMPUTE_TEST_MOBILENET_ACTIVATION_LAYER_DATASET
+
+#include "tests/framework/datasets/Datasets.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class MobileNetActivationLayerDataset final : public
+    framework::dataset::CartesianProductDataset<framework::dataset::InitializerListDataset<TensorShape>, framework::dataset::SingletonDataset<ActivationLayerInfo>>
+{
+public:
+    MobileNetActivationLayerDataset()
+        : CartesianProductDataset
+    {
+        framework::dataset::make("Shape", {
+            TensorShape(112U, 112U, 32U), TensorShape(112U, 112U, 64U), TensorShape(56U, 56U, 64U), TensorShape(56U, 56U, 128U),
+            TensorShape(28U, 28U, 128U), TensorShape(28U, 28U, 256U), TensorShape(14U, 14U, 256U), TensorShape(14U, 14U, 512U),
+            TensorShape(7U, 7U, 512U), TensorShape(7U, 7U, 1024U) }),
+        framework::dataset::make("Info", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f))
+    }
+    {
+    }
+    MobileNetActivationLayerDataset(MobileNetActivationLayerDataset &&) = default;
+    ~MobileNetActivationLayerDataset()                                  = default;
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_MOBILENET_ACTIVATION_LAYER_DATASET */
diff --git a/tests/datasets/MobileNetDepthwiseConvolutionLayerDataset.h b/tests/datasets/system_tests/mobilenet/MobileNetDepthwiseConvolutionLayerDataset.h
similarity index 71%
rename from tests/datasets/MobileNetDepthwiseConvolutionLayerDataset.h
rename to tests/datasets/system_tests/mobilenet/MobileNetDepthwiseConvolutionLayerDataset.h
index 5531a08..bd44600 100644
--- a/tests/datasets/MobileNetDepthwiseConvolutionLayerDataset.h
+++ b/tests/datasets/system_tests/mobilenet/MobileNetDepthwiseConvolutionLayerDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,14 +42,14 @@
 public:
     MobileNetDepthwiseConvolutionLayerDataset()
     {
-        add_config(TensorShape(7U, 7U, 1024U), TensorShape(3U, 3U, 1024U), TensorShape(1024U), TensorShape(3U, 3U, 1024U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U), TensorShape(512U), TensorShape(7U, 7U, 512U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(256U), TensorShape(14U, 14U, 256U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(256U), TensorShape(28U, 28U, 256U), PadStrideInfo(1, 1, 1, 1));
-        add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U), TensorShape(128U), TensorShape(56U, 56U, 128U), PadStrideInfo(1, 1, 1, 1));
-        add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 3U, 64U), TensorShape(64U), TensorShape(56U, 56U, 64U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(112U, 112U, 32U), TensorShape(3U, 3U, 32U), TensorShape(32U), TensorShape(112U, 112U, 32U), PadStrideInfo(1, 1, 1, 1));
+        add_config(TensorShape(7U, 7U, 1024U), TensorShape(3U, 3U, 1024U), TensorShape(3U, 3U, 1024U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U), TensorShape(7U, 7U, 512U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(14U, 14U, 256U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(28U, 28U, 256U), PadStrideInfo(1, 1, 1, 1));
+        add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U), TensorShape(28U, 28U, 128U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U), TensorShape(56U, 56U, 128U), PadStrideInfo(1, 1, 1, 1));
+        add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 3U, 64U), TensorShape(56U, 56U, 64U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(112U, 112U, 32U), TensorShape(3U, 3U, 32U), TensorShape(112U, 112U, 32U), PadStrideInfo(1, 1, 1, 1));
     }
 };
 } // namespace datasets
diff --git a/tests/datasets/MobileNetDepthwiseSeparableConvolutionLayerDataset.h b/tests/datasets/system_tests/mobilenet/MobileNetDepthwiseSeparableConvolutionLayerDataset.h
similarity index 97%
rename from tests/datasets/MobileNetDepthwiseSeparableConvolutionLayerDataset.h
rename to tests/datasets/system_tests/mobilenet/MobileNetDepthwiseSeparableConvolutionLayerDataset.h
index eef0000..1b4be25 100644
--- a/tests/datasets/MobileNetDepthwiseSeparableConvolutionLayerDataset.h
+++ b/tests/datasets/system_tests/mobilenet/MobileNetDepthwiseSeparableConvolutionLayerDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/tests/framework/Framework.cpp b/tests/framework/Framework.cpp
index d1fb28d..79a77d9 100644
--- a/tests/framework/Framework.cpp
+++ b/tests/framework/Framework.cpp
@@ -25,16 +25,6 @@
 
 #include "support/ToolchainSupport.h"
 
-#ifdef ARM_COMPUTE_CL
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#endif /* ARM_COMPUTE_CL */
-
-#ifdef ARM_COMPUTE_GC
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
-#endif /* ARM_COMPUTE_GC */
-
 #include <chrono>
 #include <iostream>
 #include <sstream>
@@ -310,18 +300,7 @@
                     profiler.start();
                 }
                 test_case->do_run();
-#ifdef ARM_COMPUTE_CL
-                if(opencl_is_available())
-                {
-                    CLScheduler::get().sync();
-                }
-#endif /* ARM_COMPUTE_CL */
-#ifdef ARM_COMPUTE_GC
-                if(opengles31_is_available())
-                {
-                    GCScheduler::get().sync();
-                }
-#endif /* ARM_COMPUTE_GC */
+                test_case->do_sync();
                 if(_num_iterations == 1 || i != 0)
                 {
                     profiler.stop();
diff --git a/tests/framework/Macros.h b/tests/framework/Macros.h
index 7aabb75..deca1ef 100644
--- a/tests/framework/Macros.h
+++ b/tests/framework/Macros.h
@@ -114,6 +114,11 @@
     {                        \
         FIXTURE::run();      \
     }
+#define FIXTURE_SYNC(FIXTURE) \
+    void do_sync() override   \
+    {                         \
+        FIXTURE::sync();      \
+    }
 #define FIXTURE_TEARDOWN(FIXTURE) \
     void do_teardown() override   \
     {                             \
@@ -223,6 +228,7 @@
         TEST_CASE_CONSTRUCTOR(TEST_NAME)                                            \
         FIXTURE_SETUP(FIXTURE)                                                      \
         FIXTURE_RUN(FIXTURE)                                                        \
+        FIXTURE_SYNC(FIXTURE)                                                       \
         FIXTURE_TEARDOWN(FIXTURE)                                                   \
     };                                                                              \
     TEST_REGISTRAR(TEST_NAME, MODE, STATUS)
@@ -244,6 +250,7 @@
         DATA_TEST_CASE_CONSTRUCTOR(TEST_NAME, DATASET)                                                                              \
         FIXTURE_DATA_SETUP(FIXTURE)                                                                                                 \
         FIXTURE_RUN(FIXTURE)                                                                                                        \
+        FIXTURE_SYNC(FIXTURE)                                                                                                       \
         FIXTURE_TEARDOWN(FIXTURE)                                                                                                   \
     };                                                                                                                              \
     DATA_TEST_REGISTRAR(TEST_NAME, MODE, STATUS, DATASET)
diff --git a/tests/framework/TestCase.h b/tests/framework/TestCase.h
index dbb9312..18dd12e 100644
--- a/tests/framework/TestCase.h
+++ b/tests/framework/TestCase.h
@@ -42,6 +42,7 @@
 public:
     virtual void do_setup() {};
     virtual void do_run() {};
+    virtual void do_sync() {};
     virtual void do_teardown() {};
 
     /** Default destructor. */
diff --git a/tests/framework/command_line/CommonOptions.cpp b/tests/framework/command_line/CommonOptions.cpp
new file mode 100644
index 0000000..631981b
--- /dev/null
+++ b/tests/framework/command_line/CommonOptions.cpp
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CommonOptions.h"
+
+#include "../Framework.h"
+#include "../printers/Printers.h"
+#include "CommandLineParser.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace framework
+{
+CommonOptions::CommonOptions(CommandLineParser &parser)
+    : help(parser.add_option<ToggleOption>("help")),
+      instruments(),
+      iterations(parser.add_option<SimpleOption<int>>("iterations", 1)),
+      threads(parser.add_option<SimpleOption<int>>("threads", 1)),
+      log_format(),
+      log_file(parser.add_option<SimpleOption<std::string>>("log-file")),
+      log_level(),
+      throw_errors(parser.add_option<ToggleOption>("throw-errors")),
+      color_output(parser.add_option<ToggleOption>("color-output", true)),
+      pretty_console(parser.add_option<ToggleOption>("pretty-console", false)),
+      json_file(parser.add_option<SimpleOption<std::string>>("json-file")),
+      pretty_file(parser.add_option<SimpleOption<std::string>>("pretty-file")),
+      log_streams()
+{
+    Framework                       &framework = Framework::get();
+    std::set<InstrumentsDescription> allowed_instruments
+    {
+        std::pair<InstrumentType, ScaleFactor>(InstrumentType::ALL, ScaleFactor::NONE),
+        std::pair<InstrumentType, ScaleFactor>(InstrumentType::NONE, ScaleFactor::NONE),
+    };
+
+    for(const auto &type : framework.available_instruments())
+    {
+        allowed_instruments.insert(type);
+    }
+
+    std::set<LogFormat> supported_log_formats
+    {
+        LogFormat::NONE,
+        LogFormat::PRETTY,
+        LogFormat::JSON,
+    };
+
+    std::set<LogLevel> supported_log_levels
+    {
+        LogLevel::NONE,
+        LogLevel::CONFIG,
+        LogLevel::TESTS,
+        LogLevel::ERRORS,
+        LogLevel::DEBUG,
+        LogLevel::MEASUREMENTS,
+        LogLevel::ALL,
+    };
+
+    instruments = parser.add_option<EnumListOption<InstrumentsDescription>>("instruments", allowed_instruments, std::initializer_list<InstrumentsDescription> { std::pair<InstrumentType, ScaleFactor>(InstrumentType::WALL_CLOCK_TIMER, ScaleFactor::NONE) });
+    log_format  = parser.add_option<EnumOption<LogFormat>>("log-format", supported_log_formats, LogFormat::PRETTY);
+    log_level   = parser.add_option<EnumOption<LogLevel>>("log-level", supported_log_levels, LogLevel::ALL);
+
+    help->set_help("Show this help message");
+    instruments->set_help("Set the profiling instruments to use");
+    iterations->set_help("Number of iterations per test case");
+    threads->set_help("Number of threads to use");
+    log_format->set_help("Output format for measurements and failures (affects only log-file)");
+    log_file->set_help("Write output to file instead of to the console (affected by log-format)");
+    log_level->set_help("Verbosity of the output");
+    throw_errors->set_help("Don't catch fatal errors (useful for debugging)");
+    color_output->set_help("Produce colored output on the console");
+    pretty_console->set_help("Produce pretty output on the console");
+    json_file->set_help("Write output to a json file.");
+    pretty_file->set_help("Write output to a text file");
+}
+std::vector<std::unique_ptr<Printer>> CommonOptions::create_printers()
+{
+    std::vector<std::unique_ptr<Printer>> printers;
+
+    if(pretty_console->value() && (log_file->is_set() || log_format->value() != LogFormat::PRETTY))
+    {
+        auto pretty_printer = support::cpp14::make_unique<PrettyPrinter>();
+        pretty_printer->set_color_output(color_output->value());
+        printers.push_back(std::move(pretty_printer));
+    }
+
+    std::unique_ptr<Printer> printer;
+    switch(log_format->value())
+    {
+        case LogFormat::JSON:
+            printer = support::cpp14::make_unique<JSONPrinter>();
+            break;
+        case LogFormat::NONE:
+            break;
+        case LogFormat::PRETTY:
+        default:
+            auto pretty_printer = support::cpp14::make_unique<PrettyPrinter>();
+            // Don't use colours if we print to a file:
+            pretty_printer->set_color_output((!log_file->is_set()) && color_output->value());
+            printer = std::move(pretty_printer);
+            break;
+    }
+
+    if(log_file->is_set())
+    {
+        log_streams.push_back(std::make_shared<std::ofstream>(log_file->value()));
+        if(printer != nullptr)
+        {
+            printer->set_stream(*log_streams.back().get());
+        }
+    }
+
+    if(printer != nullptr)
+    {
+        printers.push_back(std::move(printer));
+    }
+
+    if(json_file->is_set())
+    {
+        printers.push_back(support::cpp14::make_unique<JSONPrinter>());
+        log_streams.push_back(std::make_shared<std::ofstream>(json_file->value()));
+        printers.back()->set_stream(*log_streams.back().get());
+    }
+
+    if(pretty_file->is_set())
+    {
+        printers.push_back(support::cpp14::make_unique<PrettyPrinter>());
+        log_streams.push_back(std::make_shared<std::ofstream>(pretty_file->value()));
+        printers.back()->set_stream(*log_streams.back().get());
+    }
+
+    return printers;
+}
+} // namespace framework
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/framework/command_line/CommonOptions.h b/tests/framework/command_line/CommonOptions.h
new file mode 100644
index 0000000..2da2c99
--- /dev/null
+++ b/tests/framework/command_line/CommonOptions.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_COMMONOPTIONS
+#define ARM_COMPUTE_TEST_COMMONOPTIONS
+
+#include "../instruments/Instruments.h"
+#include "CommandLineOptions.h"
+#include <memory>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace framework
+{
+class CommandLineParser;
+class Printer;
+enum class LogFormat;
+enum class LogLevel;
+
+/** Common command line options used to configure the framework
+     *
+     * The options in this object get populated when "parse()" is called on the parser used to construct it.
+     * The expected workflow is:
+     *
+     * CommandLineParser parser;
+     * CommonOptions options( parser );
+     * parser.parse(argc, argv);
+     * if(options.log_level->value() > LogLevel::NONE) --> Use the options values
+     */
+class CommonOptions
+{
+public:
+    /** Constructor
+     *
+     * @param[in,out] parser A parser on which "parse()" hasn't been called yet.
+     */
+    CommonOptions(CommandLineParser &parser);
+    CommonOptions(const CommonOptions &) = delete;
+    CommonOptions &operator=(const CommonOptions &) = delete;
+    /** Create the printers based on parsed command line options
+     *
+     * @pre "parse()" has been called on the parser used to construct this object
+     *
+     * @return List of printers
+     */
+    std::vector<std::unique_ptr<Printer>> create_printers();
+
+    ToggleOption                               *help;
+    EnumListOption<InstrumentsDescription>     *instruments;
+    SimpleOption<int>                          *iterations;
+    SimpleOption<int>                          *threads;
+    EnumOption<LogFormat>                      *log_format;
+    SimpleOption<std::string>                  *log_file;
+    EnumOption<LogLevel>                       *log_level;
+    ToggleOption                               *throw_errors;
+    ToggleOption                               *color_output;
+    ToggleOption                               *pretty_console;
+    SimpleOption<std::string>                  *json_file;
+    SimpleOption<std::string>                  *pretty_file;
+    std::vector<std::shared_ptr<std::ofstream>> log_streams;
+};
+
+} // namespace framework
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_COMMONOPTIONS */
diff --git a/tests/framework/datasets/JoinDataset.h b/tests/framework/datasets/JoinDataset.h
index eded6e0..d682c19 100644
--- a/tests/framework/datasets/JoinDataset.h
+++ b/tests/framework/datasets/JoinDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,8 +47,10 @@
 class JoinDataset : public Dataset
 {
 private:
-    using iter1_type = typename T::iterator;
-    using iter2_type = typename U::iterator;
+    using T_noref    = typename std::remove_reference<T>::type;
+    using U_noref    = typename std::remove_reference<U>::type;
+    using iter1_type = typename T_noref::iterator;
+    using iter2_type = typename U_noref::iterator;
 
 public:
     /** Construct dataset from the given datasets.
@@ -65,12 +67,12 @@
     JoinDataset(JoinDataset &&) = default;
 
     /** Type of the dataset. */
-    using type = typename T::type;
+    using type = typename T_noref::type;
 
     /** Iterator for the dataset. */
     struct iterator
     {
-        iterator(const T *dataset1, const U *dataset2)
+        iterator(const T_noref *dataset1, const U_noref *dataset2)
             : _iter1{ dataset1->begin() }, _iter2{ dataset2->begin() }, _first_size{ dataset1->size() }
         {
         }
diff --git a/tests/framework/instruments/hwc.hpp b/tests/framework/instruments/hwc.hpp
index 8411576..3607ef5 100644
--- a/tests/framework/instruments/hwc.hpp
+++ b/tests/framework/instruments/hwc.hpp
@@ -39,20 +39,20 @@
 
 #if defined(ANDROID) || defined(__ANDROID__)
 /* We use _IOR_BAD/_IOW_BAD rather than _IOR/_IOW otherwise fails to compile with NDK-BUILD because of _IOC_TYPECHECK is defined, not because the paramter is invalid */
-#define MALI_IOR(a,b,c)  _IOR_BAD(a, b, c)
-#define MALI_IOW(a,b,c)  _IOW_BAD(a, b, c)
+#define MALI_IOR(a, b, c) _IOR_BAD(a, b, c)
+#define MALI_IOW(a, b, c) _IOW_BAD(a, b, c)
 #else /* defined(ANDROID) || defined(__ANDROID__) */
-#define MALI_IOR(a,b,c)  _IOR(a, b, c)
-#define MALI_IOW(a,b,c)  _IOW(a, b, c)
+#define MALI_IOR(a, b, c) _IOR(a, b, c)
+#define MALI_IOW(a, b, c) _IOW(a, b, c)
 #endif /* defined(ANDROID) || defined(__ANDROID__) */
 
 namespace mali_userspace
 {
 union uk_header
 {
-	uint32_t id;
-	uint32_t ret;
-	uint64_t sizer;
+    uint32_t id;
+    uint32_t ret;
+    uint64_t sizer;
 };
 
 #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
@@ -60,194 +60,201 @@
 
 struct mali_base_gpu_core_props
 {
-	uint32_t product_id;
-	uint16_t version_status;
-	uint16_t minor_revision;
-	uint16_t major_revision;
-	uint16_t padding;
-	uint32_t gpu_speed_mhz;
-	uint32_t gpu_freq_khz_max;
-	uint32_t gpu_freq_khz_min;
-	uint32_t log2_program_counter_size;
-	uint32_t texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
-	uint64_t gpu_available_memory_size;
+    uint32_t product_id;
+    uint16_t version_status;
+    uint16_t minor_revision;
+    uint16_t major_revision;
+    uint16_t padding;
+    uint32_t gpu_speed_mhz;
+    uint32_t gpu_freq_khz_max;
+    uint32_t gpu_freq_khz_min;
+    uint32_t log2_program_counter_size;
+    uint32_t texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+    uint64_t gpu_available_memory_size;
 };
 
 struct mali_base_gpu_l2_cache_props
 {
-	uint8_t log2_line_size;
-	uint8_t log2_cache_size;
-	uint8_t num_l2_slices;
-	uint8_t padding[5];
+    uint8_t log2_line_size;
+    uint8_t log2_cache_size;
+    uint8_t num_l2_slices;
+    uint8_t padding[5];
 };
 
 struct mali_base_gpu_tiler_props
 {
-	uint32_t bin_size_bytes;
-	uint32_t max_active_levels;
+    uint32_t bin_size_bytes;
+    uint32_t max_active_levels;
 };
 
 struct mali_base_gpu_thread_props
 {
-	uint32_t max_threads;
-	uint32_t max_workgroup_size;
-	uint32_t max_barrier_size;
-	uint16_t max_registers;
-	uint8_t max_task_queue;
-	uint8_t max_thread_group_split;
-	uint8_t impl_tech;
-	uint8_t padding[7];
+    uint32_t max_threads;
+    uint32_t max_workgroup_size;
+    uint32_t max_barrier_size;
+    uint16_t max_registers;
+    uint8_t  max_task_queue;
+    uint8_t  max_thread_group_split;
+    uint8_t  impl_tech;
+    uint8_t  padding[7];
 };
 
 struct mali_base_gpu_coherent_group
 {
-	uint64_t core_mask;
-	uint16_t num_cores;
-	uint16_t padding[3];
+    uint64_t core_mask;
+    uint16_t num_cores;
+    uint16_t padding[3];
 };
 
 struct mali_base_gpu_coherent_group_info
 {
-	uint32_t num_groups;
-	uint32_t num_core_groups;
-	uint32_t coherency;
-	uint32_t padding;
-	mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
+    uint32_t                     num_groups;
+    uint32_t                     num_core_groups;
+    uint32_t                     coherency;
+    uint32_t                     padding;
+    mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
 };
 
 #define GPU_MAX_JOB_SLOTS 16
 struct gpu_raw_gpu_props
 {
-	uint64_t shader_present;
-	uint64_t tiler_present;
-	uint64_t l2_present;
-	uint64_t unused_1;
+    uint64_t shader_present;
+    uint64_t tiler_present;
+    uint64_t l2_present;
+    uint64_t unused_1;
 
-	uint32_t l2_features;
-	uint32_t suspend_size;
-	uint32_t mem_features;
-	uint32_t mmu_features;
+    uint32_t l2_features;
+    uint32_t suspend_size;
+    uint32_t mem_features;
+    uint32_t mmu_features;
 
-	uint32_t as_present;
+    uint32_t as_present;
 
-	uint32_t js_present;
-	uint32_t js_features[GPU_MAX_JOB_SLOTS];
-	uint32_t tiler_features;
-	uint32_t texture_features[3];
+    uint32_t js_present;
+    uint32_t js_features[GPU_MAX_JOB_SLOTS];
+    uint32_t tiler_features;
+    uint32_t texture_features[3];
 
-	uint32_t gpu_id;
+    uint32_t gpu_id;
 
-	uint32_t thread_max_threads;
-	uint32_t thread_max_workgroup_size;
-	uint32_t thread_max_barrier_size;
-	uint32_t thread_features;
+    uint32_t thread_max_threads;
+    uint32_t thread_max_workgroup_size;
+    uint32_t thread_max_barrier_size;
+    uint32_t thread_features;
 
-	uint32_t coherency_mode;
+    uint32_t coherency_mode;
 };
 
 struct mali_base_gpu_props
 {
-	mali_base_gpu_core_props core_props;
-	mali_base_gpu_l2_cache_props l2_props;
-	uint64_t unused;
-	mali_base_gpu_tiler_props tiler_props;
-	mali_base_gpu_thread_props thread_props;
-	gpu_raw_gpu_props raw_props;
-	mali_base_gpu_coherent_group_info coherency_info;
+    mali_base_gpu_core_props          core_props;
+    mali_base_gpu_l2_cache_props      l2_props;
+    uint64_t                          unused;
+    mali_base_gpu_tiler_props         tiler_props;
+    mali_base_gpu_thread_props        thread_props;
+    gpu_raw_gpu_props                 raw_props;
+    mali_base_gpu_coherent_group_info coherency_info;
 };
 
 struct kbase_uk_gpuprops
 {
-	uk_header header;
-	mali_base_gpu_props props;
+    uk_header           header;
+    mali_base_gpu_props props;
 };
 
-#define KBASE_GPUPROP_VALUE_SIZE_U8  (0x0)
+#define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0)
 #define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1)
 #define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2)
 #define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3)
 
-#define KBASE_GPUPROP_PRODUCT_ID                1
-#define KBASE_GPUPROP_MINOR_REVISION			3
-#define KBASE_GPUPROP_MAJOR_REVISION			4
+#define KBASE_GPUPROP_PRODUCT_ID 1
+#define KBASE_GPUPROP_MINOR_REVISION 3
+#define KBASE_GPUPROP_MAJOR_REVISION 4
 
-#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS		61
-#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS		62
-#define KBASE_GPUPROP_COHERENCY_GROUP_0			64
-#define KBASE_GPUPROP_COHERENCY_GROUP_1			65
-#define KBASE_GPUPROP_COHERENCY_GROUP_2			66
-#define KBASE_GPUPROP_COHERENCY_GROUP_3			67
-#define KBASE_GPUPROP_COHERENCY_GROUP_4			68
-#define KBASE_GPUPROP_COHERENCY_GROUP_5			69
-#define KBASE_GPUPROP_COHERENCY_GROUP_6			70
-#define KBASE_GPUPROP_COHERENCY_GROUP_7			71
-#define KBASE_GPUPROP_COHERENCY_GROUP_8			72
-#define KBASE_GPUPROP_COHERENCY_GROUP_9			73
-#define KBASE_GPUPROP_COHERENCY_GROUP_10		74
-#define KBASE_GPUPROP_COHERENCY_GROUP_11		75
-#define KBASE_GPUPROP_COHERENCY_GROUP_12		76
-#define KBASE_GPUPROP_COHERENCY_GROUP_13		77
-#define KBASE_GPUPROP_COHERENCY_GROUP_14		78
-#define KBASE_GPUPROP_COHERENCY_GROUP_15		79
+#define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61
+#define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62
+#define KBASE_GPUPROP_COHERENCY_GROUP_0 64
+#define KBASE_GPUPROP_COHERENCY_GROUP_1 65
+#define KBASE_GPUPROP_COHERENCY_GROUP_2 66
+#define KBASE_GPUPROP_COHERENCY_GROUP_3 67
+#define KBASE_GPUPROP_COHERENCY_GROUP_4 68
+#define KBASE_GPUPROP_COHERENCY_GROUP_5 69
+#define KBASE_GPUPROP_COHERENCY_GROUP_6 70
+#define KBASE_GPUPROP_COHERENCY_GROUP_7 71
+#define KBASE_GPUPROP_COHERENCY_GROUP_8 72
+#define KBASE_GPUPROP_COHERENCY_GROUP_9 73
+#define KBASE_GPUPROP_COHERENCY_GROUP_10 74
+#define KBASE_GPUPROP_COHERENCY_GROUP_11 75
+#define KBASE_GPUPROP_COHERENCY_GROUP_12 76
+#define KBASE_GPUPROP_COHERENCY_GROUP_13 77
+#define KBASE_GPUPROP_COHERENCY_GROUP_14 78
+#define KBASE_GPUPROP_COHERENCY_GROUP_15 79
 
 struct gpu_props
 {
     uint32_t product_id;
     uint16_t minor_revision;
     uint16_t major_revision;
-	uint32_t num_groups;
+    uint32_t num_groups;
     uint32_t num_core_groups;
     uint64_t core_mask[16];
 };
 
-static const struct {
+static const struct
+{
     uint32_t type;
-    size_t offset;
-    int size;
-} gpu_property_mapping[] = {
-#define PROP(name, member) \
-	{KBASE_GPUPROP_ ## name, offsetof(struct gpu_props, member), \
-		sizeof(((struct gpu_props*)0)->member)}
-#define PROP2(name, member, off) \
-	{KBASE_GPUPROP_ ## name, offsetof(struct gpu_props, member) + off, \
-		sizeof(((struct gpu_props*)0)->member)}
-        PROP(PRODUCT_ID,                    product_id),
-        PROP(MINOR_REVISION,                minor_revision),
-        PROP(MAJOR_REVISION,                major_revision),
-        PROP(COHERENCY_NUM_GROUPS,          num_groups),
-        PROP(COHERENCY_NUM_CORE_GROUPS,     num_core_groups),
-        PROP2(COHERENCY_GROUP_0,             core_mask, 0),
-        PROP2(COHERENCY_GROUP_1,             core_mask, 1),
-        PROP2(COHERENCY_GROUP_2,             core_mask, 2),
-        PROP2(COHERENCY_GROUP_3,             core_mask, 3),
-        PROP2(COHERENCY_GROUP_4,             core_mask, 4),
-        PROP2(COHERENCY_GROUP_5,             core_mask, 5),
-        PROP2(COHERENCY_GROUP_6,             core_mask, 6),
-        PROP2(COHERENCY_GROUP_7,             core_mask, 7),
-        PROP2(COHERENCY_GROUP_8,             core_mask, 8),
-        PROP2(COHERENCY_GROUP_9,             core_mask, 9),
-        PROP2(COHERENCY_GROUP_10,            core_mask, 10),
-        PROP2(COHERENCY_GROUP_11,            core_mask, 11),
-        PROP2(COHERENCY_GROUP_12,            core_mask, 12),
-        PROP2(COHERENCY_GROUP_13,            core_mask, 13),
-        PROP2(COHERENCY_GROUP_14,            core_mask, 14),
-        PROP2(COHERENCY_GROUP_15,            core_mask, 15),
+    size_t   offset;
+    int      size;
+} gpu_property_mapping[] =
+{
+#define PROP(name, member)                                        \
+    {                                                             \
+        KBASE_GPUPROP_##name, offsetof(struct gpu_props, member), \
+        sizeof(((struct gpu_props *)0)->member)               \
+    }
+#define PROP2(name, member, off)                                        \
+    {                                                                   \
+        KBASE_GPUPROP_##name, offsetof(struct gpu_props, member) + off, \
+        sizeof(((struct gpu_props *)0)->member)                     \
+    }
+    PROP(PRODUCT_ID, product_id),
+    PROP(MINOR_REVISION, minor_revision),
+    PROP(MAJOR_REVISION, major_revision),
+    PROP(COHERENCY_NUM_GROUPS, num_groups),
+    PROP(COHERENCY_NUM_CORE_GROUPS, num_core_groups),
+    PROP2(COHERENCY_GROUP_0, core_mask, 0),
+    PROP2(COHERENCY_GROUP_1, core_mask, 1),
+    PROP2(COHERENCY_GROUP_2, core_mask, 2),
+    PROP2(COHERENCY_GROUP_3, core_mask, 3),
+    PROP2(COHERENCY_GROUP_4, core_mask, 4),
+    PROP2(COHERENCY_GROUP_5, core_mask, 5),
+    PROP2(COHERENCY_GROUP_6, core_mask, 6),
+    PROP2(COHERENCY_GROUP_7, core_mask, 7),
+    PROP2(COHERENCY_GROUP_8, core_mask, 8),
+    PROP2(COHERENCY_GROUP_9, core_mask, 9),
+    PROP2(COHERENCY_GROUP_10, core_mask, 10),
+    PROP2(COHERENCY_GROUP_11, core_mask, 11),
+    PROP2(COHERENCY_GROUP_12, core_mask, 12),
+    PROP2(COHERENCY_GROUP_13, core_mask, 13),
+    PROP2(COHERENCY_GROUP_14, core_mask, 14),
+    PROP2(COHERENCY_GROUP_15, core_mask, 15),
 #undef PROP
 #undef PROP2
-        {0, 0, 0}
+    { 0, 0, 0 }
 };
 
 struct kbase_hwcnt_reader_metadata
 {
-    uint64_t timestamp = 0;
-    uint32_t event_id = 0;
+    uint64_t timestamp  = 0;
+    uint32_t event_id   = 0;
     uint32_t buffer_idx = 0;
 };
 
 namespace
 {
 /** Message header */
-union kbase_uk_hwcnt_header {
+union kbase_uk_hwcnt_header
+{
     /* 32-bit number identifying the UK function to be called. */
     uint32_t id;
     /* The int return code returned by the called UK function. */
@@ -257,7 +264,8 @@
 };
 
 /** IOCTL parameters to check version */
-struct kbase_uk_hwcnt_reader_version_check_args {
+struct kbase_uk_hwcnt_reader_version_check_args
+{
     union kbase_uk_hwcnt_header header;
 
     uint16_t major;
@@ -265,23 +273,26 @@
     uint8_t  padding[4];
 };
 
-union kbase_pointer {
-	void *value;
-	uint32_t compat_value;
-	uint64_t sizer;
+union kbase_pointer
+{
+    void    *value;
+    uint32_t compat_value;
+    uint64_t sizer;
 };
 
-struct kbase_ioctl_get_gpuprops {
-	kbase_pointer buffer;
-	uint32_t size;
-	uint32_t flags;
+struct kbase_ioctl_get_gpuprops
+{
+    kbase_pointer buffer;
+    uint32_t      size;
+    uint32_t      flags;
 };
 
 #define KBASE_IOCTL_TYPE 0x80
 #define KBASE_IOCTL_GET_GPUPROPS MALI_IOW(KBASE_IOCTL_TYPE, 3, struct kbase_ioctl_get_gpuprops)
 
 /** IOCTL parameters to set flags */
-struct kbase_uk_hwcnt_reader_set_flags {
+struct kbase_uk_hwcnt_reader_set_flags
+{
     union kbase_uk_hwcnt_header header;
 
     uint32_t create_flags;
@@ -301,51 +312,51 @@
     uint32_t mmu_l2_bm;
 
     /* OUT */
-    int32_t  fd;
+    int32_t fd;
 };
 
 static const uint32_t HWCNT_READER_API = 1;
 
-
 struct uku_version_check_args
 {
-	uk_header header;
-	uint16_t major;
-	uint16_t minor;
-	uint8_t padding[4];
+    uk_header header;
+    uint16_t  major;
+    uint16_t  minor;
+    uint8_t   padding[4];
 };
 
-enum {
-	UKP_FUNC_ID_CHECK_VERSION = 0,
+enum
+{
+    UKP_FUNC_ID_CHECK_VERSION = 0,
     /* Related to mali0 ioctl interface */
-            LINUX_UK_BASE_MAGIC                 = 0x80,
-    BASE_CONTEXT_CREATE_KERNEL_FLAGS    = 0x2,
-    KBASE_FUNC_HWCNT_UK_FUNC_ID         = 512,
-	KBASE_FUNC_GPU_PROPS_REG_DUMP       = KBASE_FUNC_HWCNT_UK_FUNC_ID + 14,
-    KBASE_FUNC_HWCNT_READER_SETUP       = KBASE_FUNC_HWCNT_UK_FUNC_ID + 36,
-    KBASE_FUNC_HWCNT_DUMP               = KBASE_FUNC_HWCNT_UK_FUNC_ID + 11,
-    KBASE_FUNC_HWCNT_CLEAR              = KBASE_FUNC_HWCNT_UK_FUNC_ID + 12,
-    KBASE_FUNC_SET_FLAGS                = KBASE_FUNC_HWCNT_UK_FUNC_ID + 18,
+    LINUX_UK_BASE_MAGIC              = 0x80,
+    BASE_CONTEXT_CREATE_KERNEL_FLAGS = 0x2,
+    KBASE_FUNC_HWCNT_UK_FUNC_ID      = 512,
+    KBASE_FUNC_GPU_PROPS_REG_DUMP    = KBASE_FUNC_HWCNT_UK_FUNC_ID + 14,
+    KBASE_FUNC_HWCNT_READER_SETUP    = KBASE_FUNC_HWCNT_UK_FUNC_ID + 36,
+    KBASE_FUNC_HWCNT_DUMP            = KBASE_FUNC_HWCNT_UK_FUNC_ID + 11,
+    KBASE_FUNC_HWCNT_CLEAR           = KBASE_FUNC_HWCNT_UK_FUNC_ID + 12,
+    KBASE_FUNC_SET_FLAGS             = KBASE_FUNC_HWCNT_UK_FUNC_ID + 18,
 
     /* The ids of ioctl commands for the reader interface */
-            KBASE_HWCNT_READER                  = 0xBE,
-    KBASE_HWCNT_READER_GET_HWVER        = MALI_IOR(KBASE_HWCNT_READER, 0x00, uint32_t),
-    KBASE_HWCNT_READER_GET_BUFFER_SIZE  = MALI_IOR(KBASE_HWCNT_READER, 0x01, uint32_t),
-    KBASE_HWCNT_READER_DUMP             = MALI_IOW(KBASE_HWCNT_READER, 0x10, uint32_t),
-    KBASE_HWCNT_READER_CLEAR            = MALI_IOW(KBASE_HWCNT_READER, 0x11, uint32_t),
-    KBASE_HWCNT_READER_GET_BUFFER       = MALI_IOR(KBASE_HWCNT_READER, 0x20, struct kbase_hwcnt_reader_metadata),
-    KBASE_HWCNT_READER_PUT_BUFFER       = MALI_IOW(KBASE_HWCNT_READER, 0x21, struct kbase_hwcnt_reader_metadata),
-    KBASE_HWCNT_READER_SET_INTERVAL     = MALI_IOW(KBASE_HWCNT_READER, 0x30, uint32_t),
-    KBASE_HWCNT_READER_ENABLE_EVENT     = MALI_IOW(KBASE_HWCNT_READER, 0x40, uint32_t),
-    KBASE_HWCNT_READER_DISABLE_EVENT    = MALI_IOW(KBASE_HWCNT_READER, 0x41, uint32_t),
-    KBASE_HWCNT_READER_GET_API_VERSION  = MALI_IOW(KBASE_HWCNT_READER, 0xFF, uint32_t)
+    KBASE_HWCNT_READER                 = 0xBE,
+    KBASE_HWCNT_READER_GET_HWVER       = MALI_IOR(KBASE_HWCNT_READER, 0x00, uint32_t),
+    KBASE_HWCNT_READER_GET_BUFFER_SIZE = MALI_IOR(KBASE_HWCNT_READER, 0x01, uint32_t),
+    KBASE_HWCNT_READER_DUMP            = MALI_IOW(KBASE_HWCNT_READER, 0x10, uint32_t),
+    KBASE_HWCNT_READER_CLEAR           = MALI_IOW(KBASE_HWCNT_READER, 0x11, uint32_t),
+    KBASE_HWCNT_READER_GET_BUFFER      = MALI_IOR(KBASE_HWCNT_READER, 0x20, struct kbase_hwcnt_reader_metadata),
+    KBASE_HWCNT_READER_PUT_BUFFER      = MALI_IOW(KBASE_HWCNT_READER, 0x21, struct kbase_hwcnt_reader_metadata),
+    KBASE_HWCNT_READER_SET_INTERVAL    = MALI_IOW(KBASE_HWCNT_READER, 0x30, uint32_t),
+    KBASE_HWCNT_READER_ENABLE_EVENT    = MALI_IOW(KBASE_HWCNT_READER, 0x40, uint32_t),
+    KBASE_HWCNT_READER_DISABLE_EVENT   = MALI_IOW(KBASE_HWCNT_READER, 0x41, uint32_t),
+    KBASE_HWCNT_READER_GET_API_VERSION = MALI_IOW(KBASE_HWCNT_READER, 0xFF, uint32_t)
 
 };
 
 enum
 {
-    PIPE_DESCRIPTOR_IN,   /**< The index of a pipe's input descriptor. */
-    PIPE_DESCRIPTOR_OUT,  /**< The index of a pipe's output descriptor. */
+    PIPE_DESCRIPTOR_IN,  /**< The index of a pipe's input descriptor. */
+    PIPE_DESCRIPTOR_OUT, /**< The index of a pipe's output descriptor. */
 
     PIPE_DESCRIPTOR_COUNT /**< The number of descriptors forming a pipe. */
 };
@@ -355,22 +366,22 @@
     POLL_DESCRIPTOR_SIGNAL,       /**< The index of the signal descriptor in poll fds array. */
     POLL_DESCRIPTOR_HWCNT_READER, /**< The index of the hwcnt reader descriptor in poll fds array. */
 
-    POLL_DESCRIPTOR_COUNT         /**< The number of descriptors poll is waiting for. */
+    POLL_DESCRIPTOR_COUNT /**< The number of descriptors poll is waiting for. */
 };
 
 /** Write a single byte into the pipe to interrupt the reader thread */
 typedef char poll_data_t;
 }
 
-template<typename T>
+template <typename T>
 static inline int mali_ioctl(int fd, T &arg)
 {
-    auto *hdr = &arg.header;
+    auto     *hdr = &arg.header;
     const int cmd = _IOC(_IOC_READ | _IOC_WRITE, LINUX_UK_BASE_MAGIC, hdr->id, sizeof(T));
 
-    if (ioctl(fd, cmd, &arg))
+    if(ioctl(fd, cmd, &arg))
         return -1;
-    if (hdr->ret)
+    if(hdr->ret)
         return -1;
 
     return 0;
diff --git a/tests/framework/instruments/hwc_names.hpp b/tests/framework/instruments/hwc_names.hpp
index 181af7f..ffc19b5 100644
--- a/tests/framework/instruments/hwc_names.hpp
+++ b/tests/framework/instruments/hwc_names.hpp
@@ -26,16 +26,20 @@
 
 namespace mali_userspace
 {
-	enum MaliCounterBlockName {
-		MALI_NAME_BLOCK_JM      = 0,
-		MALI_NAME_BLOCK_TILER   = 1,
-		MALI_NAME_BLOCK_SHADER  = 2,
-		MALI_NAME_BLOCK_MMU     = 3
-	};
+enum MaliCounterBlockName
+{
+    MALI_NAME_BLOCK_JM     = 0,
+    MALI_NAME_BLOCK_TILER  = 1,
+    MALI_NAME_BLOCK_SHADER = 2,
+    MALI_NAME_BLOCK_MMU    = 3
+};
 
-	enum { MALI_NAME_BLOCK_SIZE = 64 };
+enum
+{
+    MALI_NAME_BLOCK_SIZE = 64
+};
 
-    /*
+/*
      * "Short names" for hardware counters used by Streamline. Counters names are
      * stored in accordance with their memory layout in the binary counter block
      * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block
@@ -46,2972 +50,3010 @@
      * where no counter exists.
      */
 
-    static const char * const hardware_counters_mali_t60x[] = {
-        /* Job Manager */
-        "",
-        "",
-        "",
-        "",
-        "T60x_MESSAGES_SENT",
-        "T60x_MESSAGES_RECEIVED",
-        "T60x_GPU_ACTIVE",
-        "T60x_IRQ_ACTIVE",
-        "T60x_JS0_JOBS",
-        "T60x_JS0_TASKS",
-        "T60x_JS0_ACTIVE",
-        "",
-        "T60x_JS0_WAIT_READ",
-        "T60x_JS0_WAIT_ISSUE",
-        "T60x_JS0_WAIT_DEPEND",
-        "T60x_JS0_WAIT_FINISH",
-        "T60x_JS1_JOBS",
-        "T60x_JS1_TASKS",
-        "T60x_JS1_ACTIVE",
-        "",
-        "T60x_JS1_WAIT_READ",
-        "T60x_JS1_WAIT_ISSUE",
-        "T60x_JS1_WAIT_DEPEND",
-        "T60x_JS1_WAIT_FINISH",
-        "T60x_JS2_JOBS",
-        "T60x_JS2_TASKS",
-        "T60x_JS2_ACTIVE",
-        "",
-        "T60x_JS2_WAIT_READ",
-        "T60x_JS2_WAIT_ISSUE",
-        "T60x_JS2_WAIT_DEPEND",
-        "T60x_JS2_WAIT_FINISH",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
+static const char *const hardware_counters_mali_t60x[] =
+{
+    /* Job Manager */
+    "",
+    "",
+    "",
+    "",
+    "T60x_MESSAGES_SENT",
+    "T60x_MESSAGES_RECEIVED",
+    "T60x_GPU_ACTIVE",
+    "T60x_IRQ_ACTIVE",
+    "T60x_JS0_JOBS",
+    "T60x_JS0_TASKS",
+    "T60x_JS0_ACTIVE",
+    "",
+    "T60x_JS0_WAIT_READ",
+    "T60x_JS0_WAIT_ISSUE",
+    "T60x_JS0_WAIT_DEPEND",
+    "T60x_JS0_WAIT_FINISH",
+    "T60x_JS1_JOBS",
+    "T60x_JS1_TASKS",
+    "T60x_JS1_ACTIVE",
+    "",
+    "T60x_JS1_WAIT_READ",
+    "T60x_JS1_WAIT_ISSUE",
+    "T60x_JS1_WAIT_DEPEND",
+    "T60x_JS1_WAIT_FINISH",
+    "T60x_JS2_JOBS",
+    "T60x_JS2_TASKS",
+    "T60x_JS2_ACTIVE",
+    "",
+    "T60x_JS2_WAIT_READ",
+    "T60x_JS2_WAIT_ISSUE",
+    "T60x_JS2_WAIT_DEPEND",
+    "T60x_JS2_WAIT_FINISH",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
 
-        /*Tiler */
-        "",
-        "",
-        "",
-        "T60x_TI_JOBS_PROCESSED",
-        "T60x_TI_TRIANGLES",
-        "T60x_TI_QUADS",
-        "T60x_TI_POLYGONS",
-        "T60x_TI_POINTS",
-        "T60x_TI_LINES",
-        "T60x_TI_VCACHE_HIT",
-        "T60x_TI_VCACHE_MISS",
-        "T60x_TI_FRONT_FACING",
-        "T60x_TI_BACK_FACING",
-        "T60x_TI_PRIM_VISIBLE",
-        "T60x_TI_PRIM_CULLED",
-        "T60x_TI_PRIM_CLIPPED",
-        "T60x_TI_LEVEL0",
-        "T60x_TI_LEVEL1",
-        "T60x_TI_LEVEL2",
-        "T60x_TI_LEVEL3",
-        "T60x_TI_LEVEL4",
-        "T60x_TI_LEVEL5",
-        "T60x_TI_LEVEL6",
-        "T60x_TI_LEVEL7",
-        "T60x_TI_COMMAND_1",
-        "T60x_TI_COMMAND_2",
-        "T60x_TI_COMMAND_3",
-        "T60x_TI_COMMAND_4",
-        "T60x_TI_COMMAND_4_7",
-        "T60x_TI_COMMAND_8_15",
-        "T60x_TI_COMMAND_16_63",
-        "T60x_TI_COMMAND_64",
-        "T60x_TI_COMPRESS_IN",
-        "T60x_TI_COMPRESS_OUT",
-        "T60x_TI_COMPRESS_FLUSH",
-        "T60x_TI_TIMESTAMPS",
-        "T60x_TI_PCACHE_HIT",
-        "T60x_TI_PCACHE_MISS",
-        "T60x_TI_PCACHE_LINE",
-        "T60x_TI_PCACHE_STALL",
-        "T60x_TI_WRBUF_HIT",
-        "T60x_TI_WRBUF_MISS",
-        "T60x_TI_WRBUF_LINE",
-        "T60x_TI_WRBUF_PARTIAL",
-        "T60x_TI_WRBUF_STALL",
-        "T60x_TI_ACTIVE",
-        "T60x_TI_LOADING_DESC",
-        "T60x_TI_INDEX_WAIT",
-        "T60x_TI_INDEX_RANGE_WAIT",
-        "T60x_TI_VERTEX_WAIT",
-        "T60x_TI_PCACHE_WAIT",
-        "T60x_TI_WRBUF_WAIT",
-        "T60x_TI_BUS_READ",
-        "T60x_TI_BUS_WRITE",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "T60x_TI_UTLB_STALL",
-        "T60x_TI_UTLB_REPLAY_MISS",
-        "T60x_TI_UTLB_REPLAY_FULL",
-        "T60x_TI_UTLB_NEW_MISS",
-        "T60x_TI_UTLB_HIT",
+    /*Tiler */
+    "",
+    "",
+    "",
+    "T60x_TI_JOBS_PROCESSED",
+    "T60x_TI_TRIANGLES",
+    "T60x_TI_QUADS",
+    "T60x_TI_POLYGONS",
+    "T60x_TI_POINTS",
+    "T60x_TI_LINES",
+    "T60x_TI_VCACHE_HIT",
+    "T60x_TI_VCACHE_MISS",
+    "T60x_TI_FRONT_FACING",
+    "T60x_TI_BACK_FACING",
+    "T60x_TI_PRIM_VISIBLE",
+    "T60x_TI_PRIM_CULLED",
+    "T60x_TI_PRIM_CLIPPED",
+    "T60x_TI_LEVEL0",
+    "T60x_TI_LEVEL1",
+    "T60x_TI_LEVEL2",
+    "T60x_TI_LEVEL3",
+    "T60x_TI_LEVEL4",
+    "T60x_TI_LEVEL5",
+    "T60x_TI_LEVEL6",
+    "T60x_TI_LEVEL7",
+    "T60x_TI_COMMAND_1",
+    "T60x_TI_COMMAND_2",
+    "T60x_TI_COMMAND_3",
+    "T60x_TI_COMMAND_4",
+    "T60x_TI_COMMAND_4_7",
+    "T60x_TI_COMMAND_8_15",
+    "T60x_TI_COMMAND_16_63",
+    "T60x_TI_COMMAND_64",
+    "T60x_TI_COMPRESS_IN",
+    "T60x_TI_COMPRESS_OUT",
+    "T60x_TI_COMPRESS_FLUSH",
+    "T60x_TI_TIMESTAMPS",
+    "T60x_TI_PCACHE_HIT",
+    "T60x_TI_PCACHE_MISS",
+    "T60x_TI_PCACHE_LINE",
+    "T60x_TI_PCACHE_STALL",
+    "T60x_TI_WRBUF_HIT",
+    "T60x_TI_WRBUF_MISS",
+    "T60x_TI_WRBUF_LINE",
+    "T60x_TI_WRBUF_PARTIAL",
+    "T60x_TI_WRBUF_STALL",
+    "T60x_TI_ACTIVE",
+    "T60x_TI_LOADING_DESC",
+    "T60x_TI_INDEX_WAIT",
+    "T60x_TI_INDEX_RANGE_WAIT",
+    "T60x_TI_VERTEX_WAIT",
+    "T60x_TI_PCACHE_WAIT",
+    "T60x_TI_WRBUF_WAIT",
+    "T60x_TI_BUS_READ",
+    "T60x_TI_BUS_WRITE",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "T60x_TI_UTLB_STALL",
+    "T60x_TI_UTLB_REPLAY_MISS",
+    "T60x_TI_UTLB_REPLAY_FULL",
+    "T60x_TI_UTLB_NEW_MISS",
+    "T60x_TI_UTLB_HIT",
 
-        /* Shader Core */
-        "",
-        "",
-        "",
-        "",
-        "T60x_FRAG_ACTIVE",
-        "T60x_FRAG_PRIMITIVES",
-        "T60x_FRAG_PRIMITIVES_DROPPED",
-        "T60x_FRAG_CYCLES_DESC",
-        "T60x_FRAG_CYCLES_PLR",
-        "T60x_FRAG_CYCLES_VERT",
-        "T60x_FRAG_CYCLES_TRISETUP",
-        "T60x_FRAG_CYCLES_RAST",
-        "T60x_FRAG_THREADS",
-        "T60x_FRAG_DUMMY_THREADS",
-        "T60x_FRAG_QUADS_RAST",
-        "T60x_FRAG_QUADS_EZS_TEST",
-        "T60x_FRAG_QUADS_EZS_KILLED",
-        "T60x_FRAG_THREADS_LZS_TEST",
-        "T60x_FRAG_THREADS_LZS_KILLED",
-        "T60x_FRAG_CYCLES_NO_TILE",
-        "T60x_FRAG_NUM_TILES",
-        "T60x_FRAG_TRANS_ELIM",
-        "T60x_COMPUTE_ACTIVE",
-        "T60x_COMPUTE_TASKS",
-        "T60x_COMPUTE_THREADS",
-        "T60x_COMPUTE_CYCLES_DESC",
-        "T60x_TRIPIPE_ACTIVE",
-        "T60x_ARITH_WORDS",
-        "T60x_ARITH_CYCLES_REG",
-        "T60x_ARITH_CYCLES_L0",
-        "T60x_ARITH_FRAG_DEPEND",
-        "T60x_LS_WORDS",
-        "T60x_LS_ISSUES",
-        "T60x_LS_RESTARTS",
-        "T60x_LS_REISSUES_MISS",
-        "T60x_LS_REISSUES_VD",
-        "T60x_LS_REISSUE_ATTRIB_MISS",
-        "T60x_LS_NO_WB",
-        "T60x_TEX_WORDS",
-        "T60x_TEX_BUBBLES",
-        "T60x_TEX_WORDS_L0",
-        "T60x_TEX_WORDS_DESC",
-        "T60x_TEX_ISSUES",
-        "T60x_TEX_RECIRC_FMISS",
-        "T60x_TEX_RECIRC_DESC",
-        "T60x_TEX_RECIRC_MULTI",
-        "T60x_TEX_RECIRC_PMISS",
-        "T60x_TEX_RECIRC_CONF",
-        "T60x_LSC_READ_HITS",
-        "T60x_LSC_READ_MISSES",
-        "T60x_LSC_WRITE_HITS",
-        "T60x_LSC_WRITE_MISSES",
-        "T60x_LSC_ATOMIC_HITS",
-        "T60x_LSC_ATOMIC_MISSES",
-        "T60x_LSC_LINE_FETCHES",
-        "T60x_LSC_DIRTY_LINE",
-        "T60x_LSC_SNOOPS",
-        "T60x_AXI_TLB_STALL",
-        "T60x_AXI_TLB_MISS",
-        "T60x_AXI_TLB_TRANSACTION",
-        "T60x_LS_TLB_MISS",
-        "T60x_LS_TLB_HIT",
-        "T60x_AXI_BEATS_READ",
-        "T60x_AXI_BEATS_WRITTEN",
+    /* Shader Core */
+    "",
+    "",
+    "",
+    "",
+    "T60x_FRAG_ACTIVE",
+    "T60x_FRAG_PRIMITIVES",
+    "T60x_FRAG_PRIMITIVES_DROPPED",
+    "T60x_FRAG_CYCLES_DESC",
+    "T60x_FRAG_CYCLES_PLR",
+    "T60x_FRAG_CYCLES_VERT",
+    "T60x_FRAG_CYCLES_TRISETUP",
+    "T60x_FRAG_CYCLES_RAST",
+    "T60x_FRAG_THREADS",
+    "T60x_FRAG_DUMMY_THREADS",
+    "T60x_FRAG_QUADS_RAST",
+    "T60x_FRAG_QUADS_EZS_TEST",
+    "T60x_FRAG_QUADS_EZS_KILLED",
+    "T60x_FRAG_THREADS_LZS_TEST",
+    "T60x_FRAG_THREADS_LZS_KILLED",
+    "T60x_FRAG_CYCLES_NO_TILE",
+    "T60x_FRAG_NUM_TILES",
+    "T60x_FRAG_TRANS_ELIM",
+    "T60x_COMPUTE_ACTIVE",
+    "T60x_COMPUTE_TASKS",
+    "T60x_COMPUTE_THREADS",
+    "T60x_COMPUTE_CYCLES_DESC",
+    "T60x_TRIPIPE_ACTIVE",
+    "T60x_ARITH_WORDS",
+    "T60x_ARITH_CYCLES_REG",
+    "T60x_ARITH_CYCLES_L0",
+    "T60x_ARITH_FRAG_DEPEND",
+    "T60x_LS_WORDS",
+    "T60x_LS_ISSUES",
+    "T60x_LS_RESTARTS",
+    "T60x_LS_REISSUES_MISS",
+    "T60x_LS_REISSUES_VD",
+    "T60x_LS_REISSUE_ATTRIB_MISS",
+    "T60x_LS_NO_WB",
+    "T60x_TEX_WORDS",
+    "T60x_TEX_BUBBLES",
+    "T60x_TEX_WORDS_L0",
+    "T60x_TEX_WORDS_DESC",
+    "T60x_TEX_ISSUES",
+    "T60x_TEX_RECIRC_FMISS",
+    "T60x_TEX_RECIRC_DESC",
+    "T60x_TEX_RECIRC_MULTI",
+    "T60x_TEX_RECIRC_PMISS",
+    "T60x_TEX_RECIRC_CONF",
+    "T60x_LSC_READ_HITS",
+    "T60x_LSC_READ_MISSES",
+    "T60x_LSC_WRITE_HITS",
+    "T60x_LSC_WRITE_MISSES",
+    "T60x_LSC_ATOMIC_HITS",
+    "T60x_LSC_ATOMIC_MISSES",
+    "T60x_LSC_LINE_FETCHES",
+    "T60x_LSC_DIRTY_LINE",
+    "T60x_LSC_SNOOPS",
+    "T60x_AXI_TLB_STALL",
+    "T60x_AXI_TLB_MISS",
+    "T60x_AXI_TLB_TRANSACTION",
+    "T60x_LS_TLB_MISS",
+    "T60x_LS_TLB_HIT",
+    "T60x_AXI_BEATS_READ",
+    "T60x_AXI_BEATS_WRITTEN",
 
-        /*L2 and MMU */
-        "",
-        "",
-        "",
-        "",
-        "T60x_MMU_HIT",
-        "T60x_MMU_NEW_MISS",
-        "T60x_MMU_REPLAY_FULL",
-        "T60x_MMU_REPLAY_MISS",
-        "T60x_MMU_TABLE_WALK",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "T60x_UTLB_HIT",
-        "T60x_UTLB_NEW_MISS",
-        "T60x_UTLB_REPLAY_FULL",
-        "T60x_UTLB_REPLAY_MISS",
-        "T60x_UTLB_STALL",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "T60x_L2_EXT_WRITE_BEATS",
-        "T60x_L2_EXT_READ_BEATS",
-        "T60x_L2_ANY_LOOKUP",
-        "T60x_L2_READ_LOOKUP",
-        "T60x_L2_SREAD_LOOKUP",
-        "T60x_L2_READ_REPLAY",
-        "T60x_L2_READ_SNOOP",
-        "T60x_L2_READ_HIT",
-        "T60x_L2_CLEAN_MISS",
-        "T60x_L2_WRITE_LOOKUP",
-        "T60x_L2_SWRITE_LOOKUP",
-        "T60x_L2_WRITE_REPLAY",
-        "T60x_L2_WRITE_SNOOP",
-        "T60x_L2_WRITE_HIT",
-        "T60x_L2_EXT_READ_FULL",
-        "T60x_L2_EXT_READ_HALF",
-        "T60x_L2_EXT_WRITE_FULL",
-        "T60x_L2_EXT_WRITE_HALF",
-        "T60x_L2_EXT_READ",
-        "T60x_L2_EXT_READ_LINE",
-        "T60x_L2_EXT_WRITE",
-        "T60x_L2_EXT_WRITE_LINE",
-        "T60x_L2_EXT_WRITE_SMALL",
-        "T60x_L2_EXT_BARRIER",
-        "T60x_L2_EXT_AR_STALL",
-        "T60x_L2_EXT_R_BUF_FULL",
-        "T60x_L2_EXT_RD_BUF_FULL",
-        "T60x_L2_EXT_R_RAW",
-        "T60x_L2_EXT_W_STALL",
-        "T60x_L2_EXT_W_BUF_FULL",
-        "T60x_L2_EXT_R_W_HAZARD",
-        "T60x_L2_TAG_HAZARD",
-        "T60x_L2_SNOOP_FULL",
-        "T60x_L2_REPLAY_FULL"
-    };
-    static const char * const hardware_counters_mali_t62x[] = {
-        /* Job Manager */
-        "",
-        "",
-        "",
-        "",
-        "T62x_MESSAGES_SENT",
-        "T62x_MESSAGES_RECEIVED",
-        "T62x_GPU_ACTIVE",
-        "T62x_IRQ_ACTIVE",
-        "T62x_JS0_JOBS",
-        "T62x_JS0_TASKS",
-        "T62x_JS0_ACTIVE",
-        "",
-        "T62x_JS0_WAIT_READ",
-        "T62x_JS0_WAIT_ISSUE",
-        "T62x_JS0_WAIT_DEPEND",
-        "T62x_JS0_WAIT_FINISH",
-        "T62x_JS1_JOBS",
-        "T62x_JS1_TASKS",
-        "T62x_JS1_ACTIVE",
-        "",
-        "T62x_JS1_WAIT_READ",
-        "T62x_JS1_WAIT_ISSUE",
-        "T62x_JS1_WAIT_DEPEND",
-        "T62x_JS1_WAIT_FINISH",
-        "T62x_JS2_JOBS",
-        "T62x_JS2_TASKS",
-        "T62x_JS2_ACTIVE",
-        "",
-        "T62x_JS2_WAIT_READ",
-        "T62x_JS2_WAIT_ISSUE",
-        "T62x_JS2_WAIT_DEPEND",
-        "T62x_JS2_WAIT_FINISH",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
+    /*L2 and MMU */
+    "",
+    "",
+    "",
+    "",
+    "T60x_MMU_HIT",
+    "T60x_MMU_NEW_MISS",
+    "T60x_MMU_REPLAY_FULL",
+    "T60x_MMU_REPLAY_MISS",
+    "T60x_MMU_TABLE_WALK",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "T60x_UTLB_HIT",
+    "T60x_UTLB_NEW_MISS",
+    "T60x_UTLB_REPLAY_FULL",
+    "T60x_UTLB_REPLAY_MISS",
+    "T60x_UTLB_STALL",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "T60x_L2_EXT_WRITE_BEATS",
+    "T60x_L2_EXT_READ_BEATS",
+    "T60x_L2_ANY_LOOKUP",
+    "T60x_L2_READ_LOOKUP",
+    "T60x_L2_SREAD_LOOKUP",
+    "T60x_L2_READ_REPLAY",
+    "T60x_L2_READ_SNOOP",
+    "T60x_L2_READ_HIT",
+    "T60x_L2_CLEAN_MISS",
+    "T60x_L2_WRITE_LOOKUP",
+    "T60x_L2_SWRITE_LOOKUP",
+    "T60x_L2_WRITE_REPLAY",
+    "T60x_L2_WRITE_SNOOP",
+    "T60x_L2_WRITE_HIT",
+    "T60x_L2_EXT_READ_FULL",
+    "T60x_L2_EXT_READ_HALF",
+    "T60x_L2_EXT_WRITE_FULL",
+    "T60x_L2_EXT_WRITE_HALF",
+    "T60x_L2_EXT_READ",
+    "T60x_L2_EXT_READ_LINE",
+    "T60x_L2_EXT_WRITE",
+    "T60x_L2_EXT_WRITE_LINE",
+    "T60x_L2_EXT_WRITE_SMALL",
+    "T60x_L2_EXT_BARRIER",
+    "T60x_L2_EXT_AR_STALL",
+    "T60x_L2_EXT_R_BUF_FULL",
+    "T60x_L2_EXT_RD_BUF_FULL",
+    "T60x_L2_EXT_R_RAW",
+    "T60x_L2_EXT_W_STALL",
+    "T60x_L2_EXT_W_BUF_FULL",
+    "T60x_L2_EXT_R_W_HAZARD",
+    "T60x_L2_TAG_HAZARD",
+    "T60x_L2_SNOOP_FULL",
+    "T60x_L2_REPLAY_FULL"
+};
+static const char *const hardware_counters_mali_t62x[] =
+{
+    /* Job Manager */
+    "",
+    "",
+    "",
+    "",
+    "T62x_MESSAGES_SENT",
+    "T62x_MESSAGES_RECEIVED",
+    "T62x_GPU_ACTIVE",
+    "T62x_IRQ_ACTIVE",
+    "T62x_JS0_JOBS",
+    "T62x_JS0_TASKS",
+    "T62x_JS0_ACTIVE",
+    "",
+    "T62x_JS0_WAIT_READ",
+    "T62x_JS0_WAIT_ISSUE",
+    "T62x_JS0_WAIT_DEPEND",
+    "T62x_JS0_WAIT_FINISH",
+    "T62x_JS1_JOBS",
+    "T62x_JS1_TASKS",
+    "T62x_JS1_ACTIVE",
+    "",
+    "T62x_JS1_WAIT_READ",
+    "T62x_JS1_WAIT_ISSUE",
+    "T62x_JS1_WAIT_DEPEND",
+    "T62x_JS1_WAIT_FINISH",
+    "T62x_JS2_JOBS",
+    "T62x_JS2_TASKS",
+    "T62x_JS2_ACTIVE",
+    "",
+    "T62x_JS2_WAIT_READ",
+    "T62x_JS2_WAIT_ISSUE",
+    "T62x_JS2_WAIT_DEPEND",
+    "T62x_JS2_WAIT_FINISH",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
 
-        /*Tiler */
-        "",
-        "",
-        "",
-        "T62x_TI_JOBS_PROCESSED",
-        "T62x_TI_TRIANGLES",
-        "T62x_TI_QUADS",
-        "T62x_TI_POLYGONS",
-        "T62x_TI_POINTS",
-        "T62x_TI_LINES",
-        "T62x_TI_VCACHE_HIT",
-        "T62x_TI_VCACHE_MISS",
-        "T62x_TI_FRONT_FACING",
-        "T62x_TI_BACK_FACING",
-        "T62x_TI_PRIM_VISIBLE",
-        "T62x_TI_PRIM_CULLED",
-        "T62x_TI_PRIM_CLIPPED",
-        "T62x_TI_LEVEL0",
-        "T62x_TI_LEVEL1",
-        "T62x_TI_LEVEL2",
-        "T62x_TI_LEVEL3",
-        "T62x_TI_LEVEL4",
-        "T62x_TI_LEVEL5",
-        "T62x_TI_LEVEL6",
-        "T62x_TI_LEVEL7",
-        "T62x_TI_COMMAND_1",
-        "T62x_TI_COMMAND_2",
-        "T62x_TI_COMMAND_3",
-        "T62x_TI_COMMAND_4",
-        "T62x_TI_COMMAND_5_7",
-        "T62x_TI_COMMAND_8_15",
-        "T62x_TI_COMMAND_16_63",
-        "T62x_TI_COMMAND_64",
-        "T62x_TI_COMPRESS_IN",
-        "T62x_TI_COMPRESS_OUT",
-        "T62x_TI_COMPRESS_FLUSH",
-        "T62x_TI_TIMESTAMPS",
-        "T62x_TI_PCACHE_HIT",
-        "T62x_TI_PCACHE_MISS",
-        "T62x_TI_PCACHE_LINE",
-        "T62x_TI_PCACHE_STALL",
-        "T62x_TI_WRBUF_HIT",
-        "T62x_TI_WRBUF_MISS",
-        "T62x_TI_WRBUF_LINE",
-        "T62x_TI_WRBUF_PARTIAL",
-        "T62x_TI_WRBUF_STALL",
-        "T62x_TI_ACTIVE",
-        "T62x_TI_LOADING_DESC",
-        "T62x_TI_INDEX_WAIT",
-        "T62x_TI_INDEX_RANGE_WAIT",
-        "T62x_TI_VERTEX_WAIT",
-        "T62x_TI_PCACHE_WAIT",
-        "T62x_TI_WRBUF_WAIT",
-        "T62x_TI_BUS_READ",
-        "T62x_TI_BUS_WRITE",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "T62x_TI_UTLB_STALL",
-        "T62x_TI_UTLB_REPLAY_MISS",
-        "T62x_TI_UTLB_REPLAY_FULL",
-        "T62x_TI_UTLB_NEW_MISS",
-        "T62x_TI_UTLB_HIT",
+    /*Tiler */
+    "",
+    "",
+    "",
+    "T62x_TI_JOBS_PROCESSED",
+    "T62x_TI_TRIANGLES",
+    "T62x_TI_QUADS",
+    "T62x_TI_POLYGONS",
+    "T62x_TI_POINTS",
+    "T62x_TI_LINES",
+    "T62x_TI_VCACHE_HIT",
+    "T62x_TI_VCACHE_MISS",
+    "T62x_TI_FRONT_FACING",
+    "T62x_TI_BACK_FACING",
+    "T62x_TI_PRIM_VISIBLE",
+    "T62x_TI_PRIM_CULLED",
+    "T62x_TI_PRIM_CLIPPED",
+    "T62x_TI_LEVEL0",
+    "T62x_TI_LEVEL1",
+    "T62x_TI_LEVEL2",
+    "T62x_TI_LEVEL3",
+    "T62x_TI_LEVEL4",
+    "T62x_TI_LEVEL5",
+    "T62x_TI_LEVEL6",
+    "T62x_TI_LEVEL7",
+    "T62x_TI_COMMAND_1",
+    "T62x_TI_COMMAND_2",
+    "T62x_TI_COMMAND_3",
+    "T62x_TI_COMMAND_4",
+    "T62x_TI_COMMAND_5_7",
+    "T62x_TI_COMMAND_8_15",
+    "T62x_TI_COMMAND_16_63",
+    "T62x_TI_COMMAND_64",
+    "T62x_TI_COMPRESS_IN",
+    "T62x_TI_COMPRESS_OUT",
+    "T62x_TI_COMPRESS_FLUSH",
+    "T62x_TI_TIMESTAMPS",
+    "T62x_TI_PCACHE_HIT",
+    "T62x_TI_PCACHE_MISS",
+    "T62x_TI_PCACHE_LINE",
+    "T62x_TI_PCACHE_STALL",
+    "T62x_TI_WRBUF_HIT",
+    "T62x_TI_WRBUF_MISS",
+    "T62x_TI_WRBUF_LINE",
+    "T62x_TI_WRBUF_PARTIAL",
+    "T62x_TI_WRBUF_STALL",
+    "T62x_TI_ACTIVE",
+    "T62x_TI_LOADING_DESC",
+    "T62x_TI_INDEX_WAIT",
+    "T62x_TI_INDEX_RANGE_WAIT",
+    "T62x_TI_VERTEX_WAIT",
+    "T62x_TI_PCACHE_WAIT",
+    "T62x_TI_WRBUF_WAIT",
+    "T62x_TI_BUS_READ",
+    "T62x_TI_BUS_WRITE",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "T62x_TI_UTLB_STALL",
+    "T62x_TI_UTLB_REPLAY_MISS",
+    "T62x_TI_UTLB_REPLAY_FULL",
+    "T62x_TI_UTLB_NEW_MISS",
+    "T62x_TI_UTLB_HIT",
 
-        /* Shader Core */
-        "",
-        "",
-        "",
-        "T62x_SHADER_CORE_ACTIVE",
-        "T62x_FRAG_ACTIVE",
-        "T62x_FRAG_PRIMITIVES",
-        "T62x_FRAG_PRIMITIVES_DROPPED",
-        "T62x_FRAG_CYCLES_DESC",
-        "T62x_FRAG_CYCLES_FPKQ_ACTIVE",
-        "T62x_FRAG_CYCLES_VERT",
-        "T62x_FRAG_CYCLES_TRISETUP",
-        "T62x_FRAG_CYCLES_EZS_ACTIVE",
-        "T62x_FRAG_THREADS",
-        "T62x_FRAG_DUMMY_THREADS",
-        "T62x_FRAG_QUADS_RAST",
-        "T62x_FRAG_QUADS_EZS_TEST",
-        "T62x_FRAG_QUADS_EZS_KILLED",
-        "T62x_FRAG_THREADS_LZS_TEST",
-        "T62x_FRAG_THREADS_LZS_KILLED",
-        "T62x_FRAG_CYCLES_NO_TILE",
-        "T62x_FRAG_NUM_TILES",
-        "T62x_FRAG_TRANS_ELIM",
-        "T62x_COMPUTE_ACTIVE",
-        "T62x_COMPUTE_TASKS",
-        "T62x_COMPUTE_THREADS",
-        "T62x_COMPUTE_CYCLES_DESC",
-        "T62x_TRIPIPE_ACTIVE",
-        "T62x_ARITH_WORDS",
-        "T62x_ARITH_CYCLES_REG",
-        "T62x_ARITH_CYCLES_L0",
-        "T62x_ARITH_FRAG_DEPEND",
-        "T62x_LS_WORDS",
-        "T62x_LS_ISSUES",
-        "T62x_LS_RESTARTS",
-        "T62x_LS_REISSUES_MISS",
-        "T62x_LS_REISSUES_VD",
-        "T62x_LS_REISSUE_ATTRIB_MISS",
-        "T62x_LS_NO_WB",
-        "T62x_TEX_WORDS",
-        "T62x_TEX_BUBBLES",
-        "T62x_TEX_WORDS_L0",
-        "T62x_TEX_WORDS_DESC",
-        "T62x_TEX_ISSUES",
-        "T62x_TEX_RECIRC_FMISS",
-        "T62x_TEX_RECIRC_DESC",
-        "T62x_TEX_RECIRC_MULTI",
-        "T62x_TEX_RECIRC_PMISS",
-        "T62x_TEX_RECIRC_CONF",
-        "T62x_LSC_READ_HITS",
-        "T62x_LSC_READ_MISSES",
-        "T62x_LSC_WRITE_HITS",
-        "T62x_LSC_WRITE_MISSES",
-        "T62x_LSC_ATOMIC_HITS",
-        "T62x_LSC_ATOMIC_MISSES",
-        "T62x_LSC_LINE_FETCHES",
-        "T62x_LSC_DIRTY_LINE",
-        "T62x_LSC_SNOOPS",
-        "T62x_AXI_TLB_STALL",
-        "T62x_AXI_TLB_MISS",
-        "T62x_AXI_TLB_TRANSACTION",
-        "T62x_LS_TLB_MISS",
-        "T62x_LS_TLB_HIT",
-        "T62x_AXI_BEATS_READ",
-        "T62x_AXI_BEATS_WRITTEN",
+    /* Shader Core */
+    "",
+    "",
+    "",
+    "T62x_SHADER_CORE_ACTIVE",
+    "T62x_FRAG_ACTIVE",
+    "T62x_FRAG_PRIMITIVES",
+    "T62x_FRAG_PRIMITIVES_DROPPED",
+    "T62x_FRAG_CYCLES_DESC",
+    "T62x_FRAG_CYCLES_FPKQ_ACTIVE",
+    "T62x_FRAG_CYCLES_VERT",
+    "T62x_FRAG_CYCLES_TRISETUP",
+    "T62x_FRAG_CYCLES_EZS_ACTIVE",
+    "T62x_FRAG_THREADS",
+    "T62x_FRAG_DUMMY_THREADS",
+    "T62x_FRAG_QUADS_RAST",
+    "T62x_FRAG_QUADS_EZS_TEST",
+    "T62x_FRAG_QUADS_EZS_KILLED",
+    "T62x_FRAG_THREADS_LZS_TEST",
+    "T62x_FRAG_THREADS_LZS_KILLED",
+    "T62x_FRAG_CYCLES_NO_TILE",
+    "T62x_FRAG_NUM_TILES",
+    "T62x_FRAG_TRANS_ELIM",
+    "T62x_COMPUTE_ACTIVE",
+    "T62x_COMPUTE_TASKS",
+    "T62x_COMPUTE_THREADS",
+    "T62x_COMPUTE_CYCLES_DESC",
+    "T62x_TRIPIPE_ACTIVE",
+    "T62x_ARITH_WORDS",
+    "T62x_ARITH_CYCLES_REG",
+    "T62x_ARITH_CYCLES_L0",
+    "T62x_ARITH_FRAG_DEPEND",
+    "T62x_LS_WORDS",
+    "T62x_LS_ISSUES",
+    "T62x_LS_RESTARTS",
+    "T62x_LS_REISSUES_MISS",
+    "T62x_LS_REISSUES_VD",
+    "T62x_LS_REISSUE_ATTRIB_MISS",
+    "T62x_LS_NO_WB",
+    "T62x_TEX_WORDS",
+    "T62x_TEX_BUBBLES",
+    "T62x_TEX_WORDS_L0",
+    "T62x_TEX_WORDS_DESC",
+    "T62x_TEX_ISSUES",
+    "T62x_TEX_RECIRC_FMISS",
+    "T62x_TEX_RECIRC_DESC",
+    "T62x_TEX_RECIRC_MULTI",
+    "T62x_TEX_RECIRC_PMISS",
+    "T62x_TEX_RECIRC_CONF",
+    "T62x_LSC_READ_HITS",
+    "T62x_LSC_READ_MISSES",
+    "T62x_LSC_WRITE_HITS",
+    "T62x_LSC_WRITE_MISSES",
+    "T62x_LSC_ATOMIC_HITS",
+    "T62x_LSC_ATOMIC_MISSES",
+    "T62x_LSC_LINE_FETCHES",
+    "T62x_LSC_DIRTY_LINE",
+    "T62x_LSC_SNOOPS",
+    "T62x_AXI_TLB_STALL",
+    "T62x_AXI_TLB_MISS",
+    "T62x_AXI_TLB_TRANSACTION",
+    "T62x_LS_TLB_MISS",
+    "T62x_LS_TLB_HIT",
+    "T62x_AXI_BEATS_READ",
+    "T62x_AXI_BEATS_WRITTEN",
 
-        /*L2 and MMU */
-        "",
-        "",
-        "",
-        "",
-        "T62x_MMU_HIT",
-        "T62x_MMU_NEW_MISS",
-        "T62x_MMU_REPLAY_FULL",
-        "T62x_MMU_REPLAY_MISS",
-        "T62x_MMU_TABLE_WALK",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "T62x_UTLB_HIT",
-        "T62x_UTLB_NEW_MISS",
-        "T62x_UTLB_REPLAY_FULL",
-        "T62x_UTLB_REPLAY_MISS",
-        "T62x_UTLB_STALL",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "T62x_L2_EXT_WRITE_BEATS",
-        "T62x_L2_EXT_READ_BEATS",
-        "T62x_L2_ANY_LOOKUP",
-        "T62x_L2_READ_LOOKUP",
-        "T62x_L2_SREAD_LOOKUP",
-        "T62x_L2_READ_REPLAY",
-        "T62x_L2_READ_SNOOP",
-        "T62x_L2_READ_HIT",
-        "T62x_L2_CLEAN_MISS",
-        "T62x_L2_WRITE_LOOKUP",
-        "T62x_L2_SWRITE_LOOKUP",
-        "T62x_L2_WRITE_REPLAY",
-        "T62x_L2_WRITE_SNOOP",
-        "T62x_L2_WRITE_HIT",
-        "T62x_L2_EXT_READ_FULL",
-        "T62x_L2_EXT_READ_HALF",
-        "T62x_L2_EXT_WRITE_FULL",
-        "T62x_L2_EXT_WRITE_HALF",
-        "T62x_L2_EXT_READ",
-        "T62x_L2_EXT_READ_LINE",
-        "T62x_L2_EXT_WRITE",
-        "T62x_L2_EXT_WRITE_LINE",
-        "T62x_L2_EXT_WRITE_SMALL",
-        "T62x_L2_EXT_BARRIER",
-        "T62x_L2_EXT_AR_STALL",
-        "T62x_L2_EXT_R_BUF_FULL",
-        "T62x_L2_EXT_RD_BUF_FULL",
-        "T62x_L2_EXT_R_RAW",
-        "T62x_L2_EXT_W_STALL",
-        "T62x_L2_EXT_W_BUF_FULL",
-        "T62x_L2_EXT_R_W_HAZARD",
-        "T62x_L2_TAG_HAZARD",
-        "T62x_L2_SNOOP_FULL",
-        "T62x_L2_REPLAY_FULL"
-    };
+    /*L2 and MMU */
+    "",
+    "",
+    "",
+    "",
+    "T62x_MMU_HIT",
+    "T62x_MMU_NEW_MISS",
+    "T62x_MMU_REPLAY_FULL",
+    "T62x_MMU_REPLAY_MISS",
+    "T62x_MMU_TABLE_WALK",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "T62x_UTLB_HIT",
+    "T62x_UTLB_NEW_MISS",
+    "T62x_UTLB_REPLAY_FULL",
+    "T62x_UTLB_REPLAY_MISS",
+    "T62x_UTLB_STALL",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "T62x_L2_EXT_WRITE_BEATS",
+    "T62x_L2_EXT_READ_BEATS",
+    "T62x_L2_ANY_LOOKUP",
+    "T62x_L2_READ_LOOKUP",
+    "T62x_L2_SREAD_LOOKUP",
+    "T62x_L2_READ_REPLAY",
+    "T62x_L2_READ_SNOOP",
+    "T62x_L2_READ_HIT",
+    "T62x_L2_CLEAN_MISS",
+    "T62x_L2_WRITE_LOOKUP",
+    "T62x_L2_SWRITE_LOOKUP",
+    "T62x_L2_WRITE_REPLAY",
+    "T62x_L2_WRITE_SNOOP",
+    "T62x_L2_WRITE_HIT",
+    "T62x_L2_EXT_READ_FULL",
+    "T62x_L2_EXT_READ_HALF",
+    "T62x_L2_EXT_WRITE_FULL",
+    "T62x_L2_EXT_WRITE_HALF",
+    "T62x_L2_EXT_READ",
+    "T62x_L2_EXT_READ_LINE",
+    "T62x_L2_EXT_WRITE",
+    "T62x_L2_EXT_WRITE_LINE",
+    "T62x_L2_EXT_WRITE_SMALL",
+    "T62x_L2_EXT_BARRIER",
+    "T62x_L2_EXT_AR_STALL",
+    "T62x_L2_EXT_R_BUF_FULL",
+    "T62x_L2_EXT_RD_BUF_FULL",
+    "T62x_L2_EXT_R_RAW",
+    "T62x_L2_EXT_W_STALL",
+    "T62x_L2_EXT_W_BUF_FULL",
+    "T62x_L2_EXT_R_W_HAZARD",
+    "T62x_L2_TAG_HAZARD",
+    "T62x_L2_SNOOP_FULL",
+    "T62x_L2_REPLAY_FULL"
+};
 
-    static const char * const hardware_counters_mali_t72x[] = {
-        /* Job Manager */
-        "",
-        "",
-        "",
-        "",
-        "T72x_GPU_ACTIVE",
-        "T72x_IRQ_ACTIVE",
-        "T72x_JS0_JOBS",
-        "T72x_JS0_TASKS",
-        "T72x_JS0_ACTIVE",
-        "T72x_JS1_JOBS",
-        "T72x_JS1_TASKS",
-        "T72x_JS1_ACTIVE",
-        "T72x_JS2_JOBS",
-        "T72x_JS2_TASKS",
-        "T72x_JS2_ACTIVE",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
+static const char *const hardware_counters_mali_t72x[] =
+{
+    /* Job Manager */
+    "",
+    "",
+    "",
+    "",
+    "T72x_GPU_ACTIVE",
+    "T72x_IRQ_ACTIVE",
+    "T72x_JS0_JOBS",
+    "T72x_JS0_TASKS",
+    "T72x_JS0_ACTIVE",
+    "T72x_JS1_JOBS",
+    "T72x_JS1_TASKS",
+    "T72x_JS1_ACTIVE",
+    "T72x_JS2_JOBS",
+    "T72x_JS2_TASKS",
+    "T72x_JS2_ACTIVE",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
 
-        /*Tiler */
-        "",
-        "",
-        "",
-        "T72x_TI_JOBS_PROCESSED",
-        "T72x_TI_TRIANGLES",
-        "T72x_TI_QUADS",
-        "T72x_TI_POLYGONS",
-        "T72x_TI_POINTS",
-        "T72x_TI_LINES",
-        "T72x_TI_FRONT_FACING",
-        "T72x_TI_BACK_FACING",
-        "T72x_TI_PRIM_VISIBLE",
-        "T72x_TI_PRIM_CULLED",
-        "T72x_TI_PRIM_CLIPPED",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "T72x_TI_ACTIVE",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
+    /*Tiler */
+    "",
+    "",
+    "",
+    "T72x_TI_JOBS_PROCESSED",
+    "T72x_TI_TRIANGLES",
+    "T72x_TI_QUADS",
+    "T72x_TI_POLYGONS",
+    "T72x_TI_POINTS",
+    "T72x_TI_LINES",
+    "T72x_TI_FRONT_FACING",
+    "T72x_TI_BACK_FACING",
+    "T72x_TI_PRIM_VISIBLE",
+    "T72x_TI_PRIM_CULLED",
+    "T72x_TI_PRIM_CLIPPED",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "T72x_TI_ACTIVE",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
 
-        /* Shader Core */
-        "",
-        "",
-        "",
-        "",
-        "T72x_FRAG_ACTIVE",
-        "T72x_FRAG_PRIMITIVES",
-        "T72x_FRAG_PRIMITIVES_DROPPED",
-        "T72x_FRAG_THREADS",
-        "T72x_FRAG_DUMMY_THREADS",
-        "T72x_FRAG_QUADS_RAST",
-        "T72x_FRAG_QUADS_EZS_TEST",
-        "T72x_FRAG_QUADS_EZS_KILLED",
-        "T72x_FRAG_THREADS_LZS_TEST",
-        "T72x_FRAG_THREADS_LZS_KILLED",
-        "T72x_FRAG_CYCLES_NO_TILE",
-        "T72x_FRAG_NUM_TILES",
-        "T72x_FRAG_TRANS_ELIM",
-        "T72x_COMPUTE_ACTIVE",
-        "T72x_COMPUTE_TASKS",
-        "T72x_COMPUTE_THREADS",
-        "T72x_TRIPIPE_ACTIVE",
-        "T72x_ARITH_WORDS",
-        "T72x_ARITH_CYCLES_REG",
-        "T72x_LS_WORDS",
-        "T72x_LS_ISSUES",
-        "T72x_LS_RESTARTS",
-        "T72x_LS_REISSUES_MISS",
-        "T72x_TEX_WORDS",
-        "T72x_TEX_BUBBLES",
-        "T72x_TEX_ISSUES",
-        "T72x_LSC_READ_HITS",
-        "T72x_LSC_READ_MISSES",
-        "T72x_LSC_WRITE_HITS",
-        "T72x_LSC_WRITE_MISSES",
-        "T72x_LSC_ATOMIC_HITS",
-        "T72x_LSC_ATOMIC_MISSES",
-        "T72x_LSC_LINE_FETCHES",
-        "T72x_LSC_DIRTY_LINE",
-        "T72x_LSC_SNOOPS",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
+    /* Shader Core */
+    "",
+    "",
+    "",
+    "",
+    "T72x_FRAG_ACTIVE",
+    "T72x_FRAG_PRIMITIVES",
+    "T72x_FRAG_PRIMITIVES_DROPPED",
+    "T72x_FRAG_THREADS",
+    "T72x_FRAG_DUMMY_THREADS",
+    "T72x_FRAG_QUADS_RAST",
+    "T72x_FRAG_QUADS_EZS_TEST",
+    "T72x_FRAG_QUADS_EZS_KILLED",
+    "T72x_FRAG_THREADS_LZS_TEST",
+    "T72x_FRAG_THREADS_LZS_KILLED",
+    "T72x_FRAG_CYCLES_NO_TILE",
+    "T72x_FRAG_NUM_TILES",
+    "T72x_FRAG_TRANS_ELIM",
+    "T72x_COMPUTE_ACTIVE",
+    "T72x_COMPUTE_TASKS",
+    "T72x_COMPUTE_THREADS",
+    "T72x_TRIPIPE_ACTIVE",
+    "T72x_ARITH_WORDS",
+    "T72x_ARITH_CYCLES_REG",
+    "T72x_LS_WORDS",
+    "T72x_LS_ISSUES",
+    "T72x_LS_RESTARTS",
+    "T72x_LS_REISSUES_MISS",
+    "T72x_TEX_WORDS",
+    "T72x_TEX_BUBBLES",
+    "T72x_TEX_ISSUES",
+    "T72x_LSC_READ_HITS",
+    "T72x_LSC_READ_MISSES",
+    "T72x_LSC_WRITE_HITS",
+    "T72x_LSC_WRITE_MISSES",
+    "T72x_LSC_ATOMIC_HITS",
+    "T72x_LSC_ATOMIC_MISSES",
+    "T72x_LSC_LINE_FETCHES",
+    "T72x_LSC_DIRTY_LINE",
+    "T72x_LSC_SNOOPS",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
 
-        /*L2 and MMU */
-        "",
-        "",
-        "",
-        "",
-        "T72x_L2_EXT_WRITE_BEAT",
-        "T72x_L2_EXT_READ_BEAT",
-        "T72x_L2_READ_SNOOP",
-        "T72x_L2_READ_HIT",
-        "T72x_L2_WRITE_SNOOP",
-        "T72x_L2_WRITE_HIT",
-        "T72x_L2_EXT_WRITE_SMALL",
-        "T72x_L2_EXT_BARRIER",
-        "T72x_L2_EXT_AR_STALL",
-        "T72x_L2_EXT_W_STALL",
-        "T72x_L2_SNOOP_FULL",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        ""
-    };
+    /*L2 and MMU */
+    "",
+    "",
+    "",
+    "",
+    "T72x_L2_EXT_WRITE_BEAT",
+    "T72x_L2_EXT_READ_BEAT",
+    "T72x_L2_READ_SNOOP",
+    "T72x_L2_READ_HIT",
+    "T72x_L2_WRITE_SNOOP",
+    "T72x_L2_WRITE_HIT",
+    "T72x_L2_EXT_WRITE_SMALL",
+    "T72x_L2_EXT_BARRIER",
+    "T72x_L2_EXT_AR_STALL",
+    "T72x_L2_EXT_W_STALL",
+    "T72x_L2_SNOOP_FULL",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    ""
+};
 
-    static const char * const hardware_counters_mali_t76x[] = {
-        /* Job Manager */
-        "",
-        "",
-        "",
-        "",
-        "T76x_MESSAGES_SENT",
-        "T76x_MESSAGES_RECEIVED",
-        "T76x_GPU_ACTIVE",
-        "T76x_IRQ_ACTIVE",
-        "T76x_JS0_JOBS",
-        "T76x_JS0_TASKS",
-        "T76x_JS0_ACTIVE",
-        "",
-        "T76x_JS0_WAIT_READ",
-        "T76x_JS0_WAIT_ISSUE",
-        "T76x_JS0_WAIT_DEPEND",
-        "T76x_JS0_WAIT_FINISH",
-        "T76x_JS1_JOBS",
-        "T76x_JS1_TASKS",
-        "T76x_JS1_ACTIVE",
-        "",
-        "T76x_JS1_WAIT_READ",
-        "T76x_JS1_WAIT_ISSUE",
-        "T76x_JS1_WAIT_DEPEND",
-        "T76x_JS1_WAIT_FINISH",
-        "T76x_JS2_JOBS",
-        "T76x_JS2_TASKS",
-        "T76x_JS2_ACTIVE",
-        "",
-        "T76x_JS2_WAIT_READ",
-        "T76x_JS2_WAIT_ISSUE",
-        "T76x_JS2_WAIT_DEPEND",
-        "T76x_JS2_WAIT_FINISH",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
+static const char *const hardware_counters_mali_t76x[] =
+{
+    /* Job Manager */
+    "",
+    "",
+    "",
+    "",
+    "T76x_MESSAGES_SENT",
+    "T76x_MESSAGES_RECEIVED",
+    "T76x_GPU_ACTIVE",
+    "T76x_IRQ_ACTIVE",
+    "T76x_JS0_JOBS",
+    "T76x_JS0_TASKS",
+    "T76x_JS0_ACTIVE",
+    "",
+    "T76x_JS0_WAIT_READ",
+    "T76x_JS0_WAIT_ISSUE",
+    "T76x_JS0_WAIT_DEPEND",
+    "T76x_JS0_WAIT_FINISH",
+    "T76x_JS1_JOBS",
+    "T76x_JS1_TASKS",
+    "T76x_JS1_ACTIVE",
+    "",
+    "T76x_JS1_WAIT_READ",
+    "T76x_JS1_WAIT_ISSUE",
+    "T76x_JS1_WAIT_DEPEND",
+    "T76x_JS1_WAIT_FINISH",
+    "T76x_JS2_JOBS",
+    "T76x_JS2_TASKS",
+    "T76x_JS2_ACTIVE",
+    "",
+    "T76x_JS2_WAIT_READ",
+    "T76x_JS2_WAIT_ISSUE",
+    "T76x_JS2_WAIT_DEPEND",
+    "T76x_JS2_WAIT_FINISH",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
 
-        /*Tiler */
-        "",
-        "",
-        "",
-        "T76x_TI_JOBS_PROCESSED",
-        "T76x_TI_TRIANGLES",
-        "T76x_TI_QUADS",
-        "T76x_TI_POLYGONS",
-        "T76x_TI_POINTS",
-        "T76x_TI_LINES",
-        "T76x_TI_VCACHE_HIT",
-        "T76x_TI_VCACHE_MISS",
-        "T76x_TI_FRONT_FACING",
-        "T76x_TI_BACK_FACING",
-        "T76x_TI_PRIM_VISIBLE",
-        "T76x_TI_PRIM_CULLED",
-        "T76x_TI_PRIM_CLIPPED",
-        "T76x_TI_LEVEL0",
-        "T76x_TI_LEVEL1",
-        "T76x_TI_LEVEL2",
-        "T76x_TI_LEVEL3",
-        "T76x_TI_LEVEL4",
-        "T76x_TI_LEVEL5",
-        "T76x_TI_LEVEL6",
-        "T76x_TI_LEVEL7",
-        "T76x_TI_COMMAND_1",
-        "T76x_TI_COMMAND_2",
-        "T76x_TI_COMMAND_3",
-        "T76x_TI_COMMAND_4",
-        "T76x_TI_COMMAND_5_7",
-        "T76x_TI_COMMAND_8_15",
-        "T76x_TI_COMMAND_16_63",
-        "T76x_TI_COMMAND_64",
-        "T76x_TI_COMPRESS_IN",
-        "T76x_TI_COMPRESS_OUT",
-        "T76x_TI_COMPRESS_FLUSH",
-        "T76x_TI_TIMESTAMPS",
-        "T76x_TI_PCACHE_HIT",
-        "T76x_TI_PCACHE_MISS",
-        "T76x_TI_PCACHE_LINE",
-        "T76x_TI_PCACHE_STALL",
-        "T76x_TI_WRBUF_HIT",
-        "T76x_TI_WRBUF_MISS",
-        "T76x_TI_WRBUF_LINE",
-        "T76x_TI_WRBUF_PARTIAL",
-        "T76x_TI_WRBUF_STALL",
-        "T76x_TI_ACTIVE",
-        "T76x_TI_LOADING_DESC",
-        "T76x_TI_INDEX_WAIT",
-        "T76x_TI_INDEX_RANGE_WAIT",
-        "T76x_TI_VERTEX_WAIT",
-        "T76x_TI_PCACHE_WAIT",
-        "T76x_TI_WRBUF_WAIT",
-        "T76x_TI_BUS_READ",
-        "T76x_TI_BUS_WRITE",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "T76x_TI_UTLB_HIT",
-        "T76x_TI_UTLB_NEW_MISS",
-        "T76x_TI_UTLB_REPLAY_FULL",
-        "T76x_TI_UTLB_REPLAY_MISS",
-        "T76x_TI_UTLB_STALL",
+    /*Tiler */
+    "",
+    "",
+    "",
+    "T76x_TI_JOBS_PROCESSED",
+    "T76x_TI_TRIANGLES",
+    "T76x_TI_QUADS",
+    "T76x_TI_POLYGONS",
+    "T76x_TI_POINTS",
+    "T76x_TI_LINES",
+    "T76x_TI_VCACHE_HIT",
+    "T76x_TI_VCACHE_MISS",
+    "T76x_TI_FRONT_FACING",
+    "T76x_TI_BACK_FACING",
+    "T76x_TI_PRIM_VISIBLE",
+    "T76x_TI_PRIM_CULLED",
+    "T76x_TI_PRIM_CLIPPED",
+    "T76x_TI_LEVEL0",
+    "T76x_TI_LEVEL1",
+    "T76x_TI_LEVEL2",
+    "T76x_TI_LEVEL3",
+    "T76x_TI_LEVEL4",
+    "T76x_TI_LEVEL5",
+    "T76x_TI_LEVEL6",
+    "T76x_TI_LEVEL7",
+    "T76x_TI_COMMAND_1",
+    "T76x_TI_COMMAND_2",
+    "T76x_TI_COMMAND_3",
+    "T76x_TI_COMMAND_4",
+    "T76x_TI_COMMAND_5_7",
+    "T76x_TI_COMMAND_8_15",
+    "T76x_TI_COMMAND_16_63",
+    "T76x_TI_COMMAND_64",
+    "T76x_TI_COMPRESS_IN",
+    "T76x_TI_COMPRESS_OUT",
+    "T76x_TI_COMPRESS_FLUSH",
+    "T76x_TI_TIMESTAMPS",
+    "T76x_TI_PCACHE_HIT",
+    "T76x_TI_PCACHE_MISS",
+    "T76x_TI_PCACHE_LINE",
+    "T76x_TI_PCACHE_STALL",
+    "T76x_TI_WRBUF_HIT",
+    "T76x_TI_WRBUF_MISS",
+    "T76x_TI_WRBUF_LINE",
+    "T76x_TI_WRBUF_PARTIAL",
+    "T76x_TI_WRBUF_STALL",
+    "T76x_TI_ACTIVE",
+    "T76x_TI_LOADING_DESC",
+    "T76x_TI_INDEX_WAIT",
+    "T76x_TI_INDEX_RANGE_WAIT",
+    "T76x_TI_VERTEX_WAIT",
+    "T76x_TI_PCACHE_WAIT",
+    "T76x_TI_WRBUF_WAIT",
+    "T76x_TI_BUS_READ",
+    "T76x_TI_BUS_WRITE",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "T76x_TI_UTLB_HIT",
+    "T76x_TI_UTLB_NEW_MISS",
+    "T76x_TI_UTLB_REPLAY_FULL",
+    "T76x_TI_UTLB_REPLAY_MISS",
+    "T76x_TI_UTLB_STALL",
 
-        /* Shader Core */
-        "",
-        "",
-        "",
-        "",
-        "T76x_FRAG_ACTIVE",
-        "T76x_FRAG_PRIMITIVES",
-        "T76x_FRAG_PRIMITIVES_DROPPED",
-        "T76x_FRAG_CYCLES_DESC",
-        "T76x_FRAG_CYCLES_FPKQ_ACTIVE",
-        "T76x_FRAG_CYCLES_VERT",
-        "T76x_FRAG_CYCLES_TRISETUP",
-        "T76x_FRAG_CYCLES_EZS_ACTIVE",
-        "T76x_FRAG_THREADS",
-        "T76x_FRAG_DUMMY_THREADS",
-        "T76x_FRAG_QUADS_RAST",
-        "T76x_FRAG_QUADS_EZS_TEST",
-        "T76x_FRAG_QUADS_EZS_KILLED",
-        "T76x_FRAG_THREADS_LZS_TEST",
-        "T76x_FRAG_THREADS_LZS_KILLED",
-        "T76x_FRAG_CYCLES_NO_TILE",
-        "T76x_FRAG_NUM_TILES",
-        "T76x_FRAG_TRANS_ELIM",
-        "T76x_COMPUTE_ACTIVE",
-        "T76x_COMPUTE_TASKS",
-        "T76x_COMPUTE_THREADS",
-        "T76x_COMPUTE_CYCLES_DESC",
-        "T76x_TRIPIPE_ACTIVE",
-        "T76x_ARITH_WORDS",
-        "T76x_ARITH_CYCLES_REG",
-        "T76x_ARITH_CYCLES_L0",
-        "T76x_ARITH_FRAG_DEPEND",
-        "T76x_LS_WORDS",
-        "T76x_LS_ISSUES",
-        "T76x_LS_REISSUE_ATTR",
-        "T76x_LS_REISSUES_VARY",
-        "T76x_LS_VARY_RV_MISS",
-        "T76x_LS_VARY_RV_HIT",
-        "T76x_LS_NO_UNPARK",
-        "T76x_TEX_WORDS",
-        "T76x_TEX_BUBBLES",
-        "T76x_TEX_WORDS_L0",
-        "T76x_TEX_WORDS_DESC",
-        "T76x_TEX_ISSUES",
-        "T76x_TEX_RECIRC_FMISS",
-        "T76x_TEX_RECIRC_DESC",
-        "T76x_TEX_RECIRC_MULTI",
-        "T76x_TEX_RECIRC_PMISS",
-        "T76x_TEX_RECIRC_CONF",
-        "T76x_LSC_READ_HITS",
-        "T76x_LSC_READ_OP",
-        "T76x_LSC_WRITE_HITS",
-        "T76x_LSC_WRITE_OP",
-        "T76x_LSC_ATOMIC_HITS",
-        "T76x_LSC_ATOMIC_OP",
-        "T76x_LSC_LINE_FETCHES",
-        "T76x_LSC_DIRTY_LINE",
-        "T76x_LSC_SNOOPS",
-        "T76x_AXI_TLB_STALL",
-        "T76x_AXI_TLB_MISS",
-        "T76x_AXI_TLB_TRANSACTION",
-        "T76x_LS_TLB_MISS",
-        "T76x_LS_TLB_HIT",
-        "T76x_AXI_BEATS_READ",
-        "T76x_AXI_BEATS_WRITTEN",
+    /* Shader Core */
+    "",
+    "",
+    "",
+    "",
+    "T76x_FRAG_ACTIVE",
+    "T76x_FRAG_PRIMITIVES",
+    "T76x_FRAG_PRIMITIVES_DROPPED",
+    "T76x_FRAG_CYCLES_DESC",
+    "T76x_FRAG_CYCLES_FPKQ_ACTIVE",
+    "T76x_FRAG_CYCLES_VERT",
+    "T76x_FRAG_CYCLES_TRISETUP",
+    "T76x_FRAG_CYCLES_EZS_ACTIVE",
+    "T76x_FRAG_THREADS",
+    "T76x_FRAG_DUMMY_THREADS",
+    "T76x_FRAG_QUADS_RAST",
+    "T76x_FRAG_QUADS_EZS_TEST",
+    "T76x_FRAG_QUADS_EZS_KILLED",
+    "T76x_FRAG_THREADS_LZS_TEST",
+    "T76x_FRAG_THREADS_LZS_KILLED",
+    "T76x_FRAG_CYCLES_NO_TILE",
+    "T76x_FRAG_NUM_TILES",
+    "T76x_FRAG_TRANS_ELIM",
+    "T76x_COMPUTE_ACTIVE",
+    "T76x_COMPUTE_TASKS",
+    "T76x_COMPUTE_THREADS",
+    "T76x_COMPUTE_CYCLES_DESC",
+    "T76x_TRIPIPE_ACTIVE",
+    "T76x_ARITH_WORDS",
+    "T76x_ARITH_CYCLES_REG",
+    "T76x_ARITH_CYCLES_L0",
+    "T76x_ARITH_FRAG_DEPEND",
+    "T76x_LS_WORDS",
+    "T76x_LS_ISSUES",
+    "T76x_LS_REISSUE_ATTR",
+    "T76x_LS_REISSUES_VARY",
+    "T76x_LS_VARY_RV_MISS",
+    "T76x_LS_VARY_RV_HIT",
+    "T76x_LS_NO_UNPARK",
+    "T76x_TEX_WORDS",
+    "T76x_TEX_BUBBLES",
+    "T76x_TEX_WORDS_L0",
+    "T76x_TEX_WORDS_DESC",
+    "T76x_TEX_ISSUES",
+    "T76x_TEX_RECIRC_FMISS",
+    "T76x_TEX_RECIRC_DESC",
+    "T76x_TEX_RECIRC_MULTI",
+    "T76x_TEX_RECIRC_PMISS",
+    "T76x_TEX_RECIRC_CONF",
+    "T76x_LSC_READ_HITS",
+    "T76x_LSC_READ_OP",
+    "T76x_LSC_WRITE_HITS",
+    "T76x_LSC_WRITE_OP",
+    "T76x_LSC_ATOMIC_HITS",
+    "T76x_LSC_ATOMIC_OP",
+    "T76x_LSC_LINE_FETCHES",
+    "T76x_LSC_DIRTY_LINE",
+    "T76x_LSC_SNOOPS",
+    "T76x_AXI_TLB_STALL",
+    "T76x_AXI_TLB_MISS",
+    "T76x_AXI_TLB_TRANSACTION",
+    "T76x_LS_TLB_MISS",
+    "T76x_LS_TLB_HIT",
+    "T76x_AXI_BEATS_READ",
+    "T76x_AXI_BEATS_WRITTEN",
 
-        /*L2 and MMU */
-        "",
-        "",
-        "",
-        "",
-        "T76x_MMU_HIT",
-        "T76x_MMU_NEW_MISS",
-        "T76x_MMU_REPLAY_FULL",
-        "T76x_MMU_REPLAY_MISS",
-        "T76x_MMU_TABLE_WALK",
-        "T76x_MMU_REQUESTS",
-        "",
-        "",
-        "T76x_UTLB_HIT",
-        "T76x_UTLB_NEW_MISS",
-        "T76x_UTLB_REPLAY_FULL",
-        "T76x_UTLB_REPLAY_MISS",
-        "T76x_UTLB_STALL",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "T76x_L2_EXT_WRITE_BEATS",
-        "T76x_L2_EXT_READ_BEATS",
-        "T76x_L2_ANY_LOOKUP",
-        "T76x_L2_READ_LOOKUP",
-        "T76x_L2_SREAD_LOOKUP",
-        "T76x_L2_READ_REPLAY",
-        "T76x_L2_READ_SNOOP",
-        "T76x_L2_READ_HIT",
-        "T76x_L2_CLEAN_MISS",
-        "T76x_L2_WRITE_LOOKUP",
-        "T76x_L2_SWRITE_LOOKUP",
-        "T76x_L2_WRITE_REPLAY",
-        "T76x_L2_WRITE_SNOOP",
-        "T76x_L2_WRITE_HIT",
-        "T76x_L2_EXT_READ_FULL",
-        "",
-        "T76x_L2_EXT_WRITE_FULL",
-        "T76x_L2_EXT_R_W_HAZARD",
-        "T76x_L2_EXT_READ",
-        "T76x_L2_EXT_READ_LINE",
-        "T76x_L2_EXT_WRITE",
-        "T76x_L2_EXT_WRITE_LINE",
-        "T76x_L2_EXT_WRITE_SMALL",
-        "T76x_L2_EXT_BARRIER",
-        "T76x_L2_EXT_AR_STALL",
-        "T76x_L2_EXT_R_BUF_FULL",
-        "T76x_L2_EXT_RD_BUF_FULL",
-        "T76x_L2_EXT_R_RAW",
-        "T76x_L2_EXT_W_STALL",
-        "T76x_L2_EXT_W_BUF_FULL",
-        "T76x_L2_EXT_R_BUF_FULL",
-        "T76x_L2_TAG_HAZARD",
-        "T76x_L2_SNOOP_FULL",
-        "T76x_L2_REPLAY_FULL"
-    };
+    /*L2 and MMU */
+    "",
+    "",
+    "",
+    "",
+    "T76x_MMU_HIT",
+    "T76x_MMU_NEW_MISS",
+    "T76x_MMU_REPLAY_FULL",
+    "T76x_MMU_REPLAY_MISS",
+    "T76x_MMU_TABLE_WALK",
+    "T76x_MMU_REQUESTS",
+    "",
+    "",
+    "T76x_UTLB_HIT",
+    "T76x_UTLB_NEW_MISS",
+    "T76x_UTLB_REPLAY_FULL",
+    "T76x_UTLB_REPLAY_MISS",
+    "T76x_UTLB_STALL",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "T76x_L2_EXT_WRITE_BEATS",
+    "T76x_L2_EXT_READ_BEATS",
+    "T76x_L2_ANY_LOOKUP",
+    "T76x_L2_READ_LOOKUP",
+    "T76x_L2_SREAD_LOOKUP",
+    "T76x_L2_READ_REPLAY",
+    "T76x_L2_READ_SNOOP",
+    "T76x_L2_READ_HIT",
+    "T76x_L2_CLEAN_MISS",
+    "T76x_L2_WRITE_LOOKUP",
+    "T76x_L2_SWRITE_LOOKUP",
+    "T76x_L2_WRITE_REPLAY",
+    "T76x_L2_WRITE_SNOOP",
+    "T76x_L2_WRITE_HIT",
+    "T76x_L2_EXT_READ_FULL",
+    "",
+    "T76x_L2_EXT_WRITE_FULL",
+    "T76x_L2_EXT_R_W_HAZARD",
+    "T76x_L2_EXT_READ",
+    "T76x_L2_EXT_READ_LINE",
+    "T76x_L2_EXT_WRITE",
+    "T76x_L2_EXT_WRITE_LINE",
+    "T76x_L2_EXT_WRITE_SMALL",
+    "T76x_L2_EXT_BARRIER",
+    "T76x_L2_EXT_AR_STALL",
+    "T76x_L2_EXT_R_BUF_FULL",
+    "T76x_L2_EXT_RD_BUF_FULL",
+    "T76x_L2_EXT_R_RAW",
+    "T76x_L2_EXT_W_STALL",
+    "T76x_L2_EXT_W_BUF_FULL",
+    "T76x_L2_EXT_R_BUF_FULL",
+    "T76x_L2_TAG_HAZARD",
+    "T76x_L2_SNOOP_FULL",
+    "T76x_L2_REPLAY_FULL"
+};
 
-    static const char * const hardware_counters_mali_t82x[] = {
-        /* Job Manager */
-        "",
-        "",
-        "",
-        "",
-        "T82x_MESSAGES_SENT",
-        "T82x_MESSAGES_RECEIVED",
-        "T82x_GPU_ACTIVE",
-        "T82x_IRQ_ACTIVE",
-        "T82x_JS0_JOBS",
-        "T82x_JS0_TASKS",
-        "T82x_JS0_ACTIVE",
-        "",
-        "T82x_JS0_WAIT_READ",
-        "T82x_JS0_WAIT_ISSUE",
-        "T82x_JS0_WAIT_DEPEND",
-        "T82x_JS0_WAIT_FINISH",
-        "T82x_JS1_JOBS",
-        "T82x_JS1_TASKS",
-        "T82x_JS1_ACTIVE",
-        "",
-        "T82x_JS1_WAIT_READ",
-        "T82x_JS1_WAIT_ISSUE",
-        "T82x_JS1_WAIT_DEPEND",
-        "T82x_JS1_WAIT_FINISH",
-        "T82x_JS2_JOBS",
-        "T82x_JS2_TASKS",
-        "T82x_JS2_ACTIVE",
-        "",
-        "T82x_JS2_WAIT_READ",
-        "T82x_JS2_WAIT_ISSUE",
-        "T82x_JS2_WAIT_DEPEND",
-        "T82x_JS2_WAIT_FINISH",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
+static const char *const hardware_counters_mali_t82x[] =
+{
+    /* Job Manager */
+    "",
+    "",
+    "",
+    "",
+    "T82x_MESSAGES_SENT",
+    "T82x_MESSAGES_RECEIVED",
+    "T82x_GPU_ACTIVE",
+    "T82x_IRQ_ACTIVE",
+    "T82x_JS0_JOBS",
+    "T82x_JS0_TASKS",
+    "T82x_JS0_ACTIVE",
+    "",
+    "T82x_JS0_WAIT_READ",
+    "T82x_JS0_WAIT_ISSUE",
+    "T82x_JS0_WAIT_DEPEND",
+    "T82x_JS0_WAIT_FINISH",
+    "T82x_JS1_JOBS",
+    "T82x_JS1_TASKS",
+    "T82x_JS1_ACTIVE",
+    "",
+    "T82x_JS1_WAIT_READ",
+    "T82x_JS1_WAIT_ISSUE",
+    "T82x_JS1_WAIT_DEPEND",
+    "T82x_JS1_WAIT_FINISH",
+    "T82x_JS2_JOBS",
+    "T82x_JS2_TASKS",
+    "T82x_JS2_ACTIVE",
+    "",
+    "T82x_JS2_WAIT_READ",
+    "T82x_JS2_WAIT_ISSUE",
+    "T82x_JS2_WAIT_DEPEND",
+    "T82x_JS2_WAIT_FINISH",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
 
-        /*Tiler */
-        "",
-        "",
-        "",
-        "T82x_TI_JOBS_PROCESSED",
-        "T82x_TI_TRIANGLES",
-        "T82x_TI_QUADS",
-        "T82x_TI_POLYGONS",
-        "T82x_TI_POINTS",
-        "T82x_TI_LINES",
-        "T82x_TI_FRONT_FACING",
-        "T82x_TI_BACK_FACING",
-        "T82x_TI_PRIM_VISIBLE",
-        "T82x_TI_PRIM_CULLED",
-        "T82x_TI_PRIM_CLIPPED",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "T82x_TI_ACTIVE",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
+    /*Tiler */
+    "",
+    "",
+    "",
+    "T82x_TI_JOBS_PROCESSED",
+    "T82x_TI_TRIANGLES",
+    "T82x_TI_QUADS",
+    "T82x_TI_POLYGONS",
+    "T82x_TI_POINTS",
+    "T82x_TI_LINES",
+    "T82x_TI_FRONT_FACING",
+    "T82x_TI_BACK_FACING",
+    "T82x_TI_PRIM_VISIBLE",
+    "T82x_TI_PRIM_CULLED",
+    "T82x_TI_PRIM_CLIPPED",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "T82x_TI_ACTIVE",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
 
-        /* Shader Core */
-        "",
-        "",
-        "",
-        "",
-        "T82x_FRAG_ACTIVE",
-        "T82x_FRAG_PRIMITIVES",
-        "T82x_FRAG_PRIMITIVES_DROPPED",
-        "T82x_FRAG_CYCLES_DESC",
-        "T82x_FRAG_CYCLES_FPKQ_ACTIVE",
-        "T82x_FRAG_CYCLES_VERT",
-        "T82x_FRAG_CYCLES_TRISETUP",
-        "T82x_FRAG_CYCLES_EZS_ACTIVE",
-        "T82x_FRAG_THREADS",
-        "T82x_FRAG_DUMMY_THREADS",
-        "T82x_FRAG_QUADS_RAST",
-        "T82x_FRAG_QUADS_EZS_TEST",
-        "T82x_FRAG_QUADS_EZS_KILLED",
-        "T82x_FRAG_THREADS_LZS_TEST",
-        "T82x_FRAG_THREADS_LZS_KILLED",
-        "T82x_FRAG_CYCLES_NO_TILE",
-        "T82x_FRAG_NUM_TILES",
-        "T82x_FRAG_TRANS_ELIM",
-        "T82x_COMPUTE_ACTIVE",
-        "T82x_COMPUTE_TASKS",
-        "T82x_COMPUTE_THREADS",
-        "T82x_COMPUTE_CYCLES_DESC",
-        "T82x_TRIPIPE_ACTIVE",
-        "T82x_ARITH_WORDS",
-        "T82x_ARITH_CYCLES_REG",
-        "T82x_ARITH_CYCLES_L0",
-        "T82x_ARITH_FRAG_DEPEND",
-        "T82x_LS_WORDS",
-        "T82x_LS_ISSUES",
-        "T82x_LS_REISSUE_ATTR",
-        "T82x_LS_REISSUES_VARY",
-        "T82x_LS_VARY_RV_MISS",
-        "T82x_LS_VARY_RV_HIT",
-        "T82x_LS_NO_UNPARK",
-        "T82x_TEX_WORDS",
-        "T82x_TEX_BUBBLES",
-        "T82x_TEX_WORDS_L0",
-        "T82x_TEX_WORDS_DESC",
-        "T82x_TEX_ISSUES",
-        "T82x_TEX_RECIRC_FMISS",
-        "T82x_TEX_RECIRC_DESC",
-        "T82x_TEX_RECIRC_MULTI",
-        "T82x_TEX_RECIRC_PMISS",
-        "T82x_TEX_RECIRC_CONF",
-        "T82x_LSC_READ_HITS",
-        "T82x_LSC_READ_OP",
-        "T82x_LSC_WRITE_HITS",
-        "T82x_LSC_WRITE_OP",
-        "T82x_LSC_ATOMIC_HITS",
-        "T82x_LSC_ATOMIC_OP",
-        "T82x_LSC_LINE_FETCHES",
-        "T82x_LSC_DIRTY_LINE",
-        "T82x_LSC_SNOOPS",
-        "T82x_AXI_TLB_STALL",
-        "T82x_AXI_TLB_MISS",
-        "T82x_AXI_TLB_TRANSACTION",
-        "T82x_LS_TLB_MISS",
-        "T82x_LS_TLB_HIT",
-        "T82x_AXI_BEATS_READ",
-        "T82x_AXI_BEATS_WRITTEN",
+    /* Shader Core */
+    "",
+    "",
+    "",
+    "",
+    "T82x_FRAG_ACTIVE",
+    "T82x_FRAG_PRIMITIVES",
+    "T82x_FRAG_PRIMITIVES_DROPPED",
+    "T82x_FRAG_CYCLES_DESC",
+    "T82x_FRAG_CYCLES_FPKQ_ACTIVE",
+    "T82x_FRAG_CYCLES_VERT",
+    "T82x_FRAG_CYCLES_TRISETUP",
+    "T82x_FRAG_CYCLES_EZS_ACTIVE",
+    "T82x_FRAG_THREADS",
+    "T82x_FRAG_DUMMY_THREADS",
+    "T82x_FRAG_QUADS_RAST",
+    "T82x_FRAG_QUADS_EZS_TEST",
+    "T82x_FRAG_QUADS_EZS_KILLED",
+    "T82x_FRAG_THREADS_LZS_TEST",
+    "T82x_FRAG_THREADS_LZS_KILLED",
+    "T82x_FRAG_CYCLES_NO_TILE",
+    "T82x_FRAG_NUM_TILES",
+    "T82x_FRAG_TRANS_ELIM",
+    "T82x_COMPUTE_ACTIVE",
+    "T82x_COMPUTE_TASKS",
+    "T82x_COMPUTE_THREADS",
+    "T82x_COMPUTE_CYCLES_DESC",
+    "T82x_TRIPIPE_ACTIVE",
+    "T82x_ARITH_WORDS",
+    "T82x_ARITH_CYCLES_REG",
+    "T82x_ARITH_CYCLES_L0",
+    "T82x_ARITH_FRAG_DEPEND",
+    "T82x_LS_WORDS",
+    "T82x_LS_ISSUES",
+    "T82x_LS_REISSUE_ATTR",
+    "T82x_LS_REISSUES_VARY",
+    "T82x_LS_VARY_RV_MISS",
+    "T82x_LS_VARY_RV_HIT",
+    "T82x_LS_NO_UNPARK",
+    "T82x_TEX_WORDS",
+    "T82x_TEX_BUBBLES",
+    "T82x_TEX_WORDS_L0",
+    "T82x_TEX_WORDS_DESC",
+    "T82x_TEX_ISSUES",
+    "T82x_TEX_RECIRC_FMISS",
+    "T82x_TEX_RECIRC_DESC",
+    "T82x_TEX_RECIRC_MULTI",
+    "T82x_TEX_RECIRC_PMISS",
+    "T82x_TEX_RECIRC_CONF",
+    "T82x_LSC_READ_HITS",
+    "T82x_LSC_READ_OP",
+    "T82x_LSC_WRITE_HITS",
+    "T82x_LSC_WRITE_OP",
+    "T82x_LSC_ATOMIC_HITS",
+    "T82x_LSC_ATOMIC_OP",
+    "T82x_LSC_LINE_FETCHES",
+    "T82x_LSC_DIRTY_LINE",
+    "T82x_LSC_SNOOPS",
+    "T82x_AXI_TLB_STALL",
+    "T82x_AXI_TLB_MISS",
+    "T82x_AXI_TLB_TRANSACTION",
+    "T82x_LS_TLB_MISS",
+    "T82x_LS_TLB_HIT",
+    "T82x_AXI_BEATS_READ",
+    "T82x_AXI_BEATS_WRITTEN",
 
-        /*L2 and MMU */
-        "",
-        "",
-        "",
-        "",
-        "T82x_MMU_HIT",
-        "T82x_MMU_NEW_MISS",
-        "T82x_MMU_REPLAY_FULL",
-        "T82x_MMU_REPLAY_MISS",
-        "T82x_MMU_TABLE_WALK",
-        "T82x_MMU_REQUESTS",
-        "",
-        "",
-        "T82x_UTLB_HIT",
-        "T82x_UTLB_NEW_MISS",
-        "T82x_UTLB_REPLAY_FULL",
-        "T82x_UTLB_REPLAY_MISS",
-        "T82x_UTLB_STALL",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "T82x_L2_EXT_WRITE_BEATS",
-        "T82x_L2_EXT_READ_BEATS",
-        "T82x_L2_ANY_LOOKUP",
-        "T82x_L2_READ_LOOKUP",
-        "T82x_L2_SREAD_LOOKUP",
-        "T82x_L2_READ_REPLAY",
-        "T82x_L2_READ_SNOOP",
-        "T82x_L2_READ_HIT",
-        "T82x_L2_CLEAN_MISS",
-        "T82x_L2_WRITE_LOOKUP",
-        "T82x_L2_SWRITE_LOOKUP",
-        "T82x_L2_WRITE_REPLAY",
-        "T82x_L2_WRITE_SNOOP",
-        "T82x_L2_WRITE_HIT",
-        "T82x_L2_EXT_READ_FULL",
-        "",
-        "T82x_L2_EXT_WRITE_FULL",
-        "T82x_L2_EXT_R_W_HAZARD",
-        "T82x_L2_EXT_READ",
-        "T82x_L2_EXT_READ_LINE",
-        "T82x_L2_EXT_WRITE",
-        "T82x_L2_EXT_WRITE_LINE",
-        "T82x_L2_EXT_WRITE_SMALL",
-        "T82x_L2_EXT_BARRIER",
-        "T82x_L2_EXT_AR_STALL",
-        "T82x_L2_EXT_R_BUF_FULL",
-        "T82x_L2_EXT_RD_BUF_FULL",
-        "T82x_L2_EXT_R_RAW",
-        "T82x_L2_EXT_W_STALL",
-        "T82x_L2_EXT_W_BUF_FULL",
-        "T82x_L2_EXT_R_BUF_FULL",
-        "T82x_L2_TAG_HAZARD",
-        "T82x_L2_SNOOP_FULL",
-        "T82x_L2_REPLAY_FULL"
-    };
+    /*L2 and MMU */
+    "",
+    "",
+    "",
+    "",
+    "T82x_MMU_HIT",
+    "T82x_MMU_NEW_MISS",
+    "T82x_MMU_REPLAY_FULL",
+    "T82x_MMU_REPLAY_MISS",
+    "T82x_MMU_TABLE_WALK",
+    "T82x_MMU_REQUESTS",
+    "",
+    "",
+    "T82x_UTLB_HIT",
+    "T82x_UTLB_NEW_MISS",
+    "T82x_UTLB_REPLAY_FULL",
+    "T82x_UTLB_REPLAY_MISS",
+    "T82x_UTLB_STALL",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "T82x_L2_EXT_WRITE_BEATS",
+    "T82x_L2_EXT_READ_BEATS",
+    "T82x_L2_ANY_LOOKUP",
+    "T82x_L2_READ_LOOKUP",
+    "T82x_L2_SREAD_LOOKUP",
+    "T82x_L2_READ_REPLAY",
+    "T82x_L2_READ_SNOOP",
+    "T82x_L2_READ_HIT",
+    "T82x_L2_CLEAN_MISS",
+    "T82x_L2_WRITE_LOOKUP",
+    "T82x_L2_SWRITE_LOOKUP",
+    "T82x_L2_WRITE_REPLAY",
+    "T82x_L2_WRITE_SNOOP",
+    "T82x_L2_WRITE_HIT",
+    "T82x_L2_EXT_READ_FULL",
+    "",
+    "T82x_L2_EXT_WRITE_FULL",
+    "T82x_L2_EXT_R_W_HAZARD",
+    "T82x_L2_EXT_READ",
+    "T82x_L2_EXT_READ_LINE",
+    "T82x_L2_EXT_WRITE",
+    "T82x_L2_EXT_WRITE_LINE",
+    "T82x_L2_EXT_WRITE_SMALL",
+    "T82x_L2_EXT_BARRIER",
+    "T82x_L2_EXT_AR_STALL",
+    "T82x_L2_EXT_R_BUF_FULL",
+    "T82x_L2_EXT_RD_BUF_FULL",
+    "T82x_L2_EXT_R_RAW",
+    "T82x_L2_EXT_W_STALL",
+    "T82x_L2_EXT_W_BUF_FULL",
+    "T82x_L2_EXT_R_BUF_FULL",
+    "T82x_L2_TAG_HAZARD",
+    "T82x_L2_SNOOP_FULL",
+    "T82x_L2_REPLAY_FULL"
+};
 
-    static const char * const hardware_counters_mali_t83x[] = {
-        /* Job Manager */
-        "",
-        "",
-        "",
-        "",
-        "T83x_MESSAGES_SENT",
-        "T83x_MESSAGES_RECEIVED",
-        "T83x_GPU_ACTIVE",
-        "T83x_IRQ_ACTIVE",
-        "T83x_JS0_JOBS",
-        "T83x_JS0_TASKS",
-        "T83x_JS0_ACTIVE",
-        "",
-        "T83x_JS0_WAIT_READ",
-        "T83x_JS0_WAIT_ISSUE",
-        "T83x_JS0_WAIT_DEPEND",
-        "T83x_JS0_WAIT_FINISH",
-        "T83x_JS1_JOBS",
-        "T83x_JS1_TASKS",
-        "T83x_JS1_ACTIVE",
-        "",
-        "T83x_JS1_WAIT_READ",
-        "T83x_JS1_WAIT_ISSUE",
-        "T83x_JS1_WAIT_DEPEND",
-        "T83x_JS1_WAIT_FINISH",
-        "T83x_JS2_JOBS",
-        "T83x_JS2_TASKS",
-        "T83x_JS2_ACTIVE",
-        "",
-        "T83x_JS2_WAIT_READ",
-        "T83x_JS2_WAIT_ISSUE",
-        "T83x_JS2_WAIT_DEPEND",
-        "T83x_JS2_WAIT_FINISH",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
+static const char *const hardware_counters_mali_t83x[] =
+{
+    /* Job Manager */
+    "",
+    "",
+    "",
+    "",
+    "T83x_MESSAGES_SENT",
+    "T83x_MESSAGES_RECEIVED",
+    "T83x_GPU_ACTIVE",
+    "T83x_IRQ_ACTIVE",
+    "T83x_JS0_JOBS",
+    "T83x_JS0_TASKS",
+    "T83x_JS0_ACTIVE",
+    "",
+    "T83x_JS0_WAIT_READ",
+    "T83x_JS0_WAIT_ISSUE",
+    "T83x_JS0_WAIT_DEPEND",
+    "T83x_JS0_WAIT_FINISH",
+    "T83x_JS1_JOBS",
+    "T83x_JS1_TASKS",
+    "T83x_JS1_ACTIVE",
+    "",
+    "T83x_JS1_WAIT_READ",
+    "T83x_JS1_WAIT_ISSUE",
+    "T83x_JS1_WAIT_DEPEND",
+    "T83x_JS1_WAIT_FINISH",
+    "T83x_JS2_JOBS",
+    "T83x_JS2_TASKS",
+    "T83x_JS2_ACTIVE",
+    "",
+    "T83x_JS2_WAIT_READ",
+    "T83x_JS2_WAIT_ISSUE",
+    "T83x_JS2_WAIT_DEPEND",
+    "T83x_JS2_WAIT_FINISH",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
 
-        /*Tiler */
-        "",
-        "",
-        "",
-        "T83x_TI_JOBS_PROCESSED",
-        "T83x_TI_TRIANGLES",
-        "T83x_TI_QUADS",
-        "T83x_TI_POLYGONS",
-        "T83x_TI_POINTS",
-        "T83x_TI_LINES",
-        "T83x_TI_FRONT_FACING",
-        "T83x_TI_BACK_FACING",
-        "T83x_TI_PRIM_VISIBLE",
-        "T83x_TI_PRIM_CULLED",
-        "T83x_TI_PRIM_CLIPPED",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "T83x_TI_ACTIVE",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
+    /*Tiler */
+    "",
+    "",
+    "",
+    "T83x_TI_JOBS_PROCESSED",
+    "T83x_TI_TRIANGLES",
+    "T83x_TI_QUADS",
+    "T83x_TI_POLYGONS",
+    "T83x_TI_POINTS",
+    "T83x_TI_LINES",
+    "T83x_TI_FRONT_FACING",
+    "T83x_TI_BACK_FACING",
+    "T83x_TI_PRIM_VISIBLE",
+    "T83x_TI_PRIM_CULLED",
+    "T83x_TI_PRIM_CLIPPED",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "T83x_TI_ACTIVE",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
 
-        /* Shader Core */
-        "",
-        "",
-        "",
-        "",
-        "T83x_FRAG_ACTIVE",
-        "T83x_FRAG_PRIMITIVES",
-        "T83x_FRAG_PRIMITIVES_DROPPED",
-        "T83x_FRAG_CYCLES_DESC",
-        "T83x_FRAG_CYCLES_FPKQ_ACTIVE",
-        "T83x_FRAG_CYCLES_VERT",
-        "T83x_FRAG_CYCLES_TRISETUP",
-        "T83x_FRAG_CYCLES_EZS_ACTIVE",
-        "T83x_FRAG_THREADS",
-        "T83x_FRAG_DUMMY_THREADS",
-        "T83x_FRAG_QUADS_RAST",
-        "T83x_FRAG_QUADS_EZS_TEST",
-        "T83x_FRAG_QUADS_EZS_KILLED",
-        "T83x_FRAG_THREADS_LZS_TEST",
-        "T83x_FRAG_THREADS_LZS_KILLED",
-        "T83x_FRAG_CYCLES_NO_TILE",
-        "T83x_FRAG_NUM_TILES",
-        "T83x_FRAG_TRANS_ELIM",
-        "T83x_COMPUTE_ACTIVE",
-        "T83x_COMPUTE_TASKS",
-        "T83x_COMPUTE_THREADS",
-        "T83x_COMPUTE_CYCLES_DESC",
-        "T83x_TRIPIPE_ACTIVE",
-        "T83x_ARITH_WORDS",
-        "T83x_ARITH_CYCLES_REG",
-        "T83x_ARITH_CYCLES_L0",
-        "T83x_ARITH_FRAG_DEPEND",
-        "T83x_LS_WORDS",
-        "T83x_LS_ISSUES",
-        "T83x_LS_REISSUE_ATTR",
-        "T83x_LS_REISSUES_VARY",
-        "T83x_LS_VARY_RV_MISS",
-        "T83x_LS_VARY_RV_HIT",
-        "T83x_LS_NO_UNPARK",
-        "T83x_TEX_WORDS",
-        "T83x_TEX_BUBBLES",
-        "T83x_TEX_WORDS_L0",
-        "T83x_TEX_WORDS_DESC",
-        "T83x_TEX_ISSUES",
-        "T83x_TEX_RECIRC_FMISS",
-        "T83x_TEX_RECIRC_DESC",
-        "T83x_TEX_RECIRC_MULTI",
-        "T83x_TEX_RECIRC_PMISS",
-        "T83x_TEX_RECIRC_CONF",
-        "T83x_LSC_READ_HITS",
-        "T83x_LSC_READ_OP",
-        "T83x_LSC_WRITE_HITS",
-        "T83x_LSC_WRITE_OP",
-        "T83x_LSC_ATOMIC_HITS",
-        "T83x_LSC_ATOMIC_OP",
-        "T83x_LSC_LINE_FETCHES",
-        "T83x_LSC_DIRTY_LINE",
-        "T83x_LSC_SNOOPS",
-        "T83x_AXI_TLB_STALL",
-        "T83x_AXI_TLB_MISS",
-        "T83x_AXI_TLB_TRANSACTION",
-        "T83x_LS_TLB_MISS",
-        "T83x_LS_TLB_HIT",
-        "T83x_AXI_BEATS_READ",
-        "T83x_AXI_BEATS_WRITTEN",
+    /* Shader Core */
+    "",
+    "",
+    "",
+    "",
+    "T83x_FRAG_ACTIVE",
+    "T83x_FRAG_PRIMITIVES",
+    "T83x_FRAG_PRIMITIVES_DROPPED",
+    "T83x_FRAG_CYCLES_DESC",
+    "T83x_FRAG_CYCLES_FPKQ_ACTIVE",
+    "T83x_FRAG_CYCLES_VERT",
+    "T83x_FRAG_CYCLES_TRISETUP",
+    "T83x_FRAG_CYCLES_EZS_ACTIVE",
+    "T83x_FRAG_THREADS",
+    "T83x_FRAG_DUMMY_THREADS",
+    "T83x_FRAG_QUADS_RAST",
+    "T83x_FRAG_QUADS_EZS_TEST",
+    "T83x_FRAG_QUADS_EZS_KILLED",
+    "T83x_FRAG_THREADS_LZS_TEST",
+    "T83x_FRAG_THREADS_LZS_KILLED",
+    "T83x_FRAG_CYCLES_NO_TILE",
+    "T83x_FRAG_NUM_TILES",
+    "T83x_FRAG_TRANS_ELIM",
+    "T83x_COMPUTE_ACTIVE",
+    "T83x_COMPUTE_TASKS",
+    "T83x_COMPUTE_THREADS",
+    "T83x_COMPUTE_CYCLES_DESC",
+    "T83x_TRIPIPE_ACTIVE",
+    "T83x_ARITH_WORDS",
+    "T83x_ARITH_CYCLES_REG",
+    "T83x_ARITH_CYCLES_L0",
+    "T83x_ARITH_FRAG_DEPEND",
+    "T83x_LS_WORDS",
+    "T83x_LS_ISSUES",
+    "T83x_LS_REISSUE_ATTR",
+    "T83x_LS_REISSUES_VARY",
+    "T83x_LS_VARY_RV_MISS",
+    "T83x_LS_VARY_RV_HIT",
+    "T83x_LS_NO_UNPARK",
+    "T83x_TEX_WORDS",
+    "T83x_TEX_BUBBLES",
+    "T83x_TEX_WORDS_L0",
+    "T83x_TEX_WORDS_DESC",
+    "T83x_TEX_ISSUES",
+    "T83x_TEX_RECIRC_FMISS",
+    "T83x_TEX_RECIRC_DESC",
+    "T83x_TEX_RECIRC_MULTI",
+    "T83x_TEX_RECIRC_PMISS",
+    "T83x_TEX_RECIRC_CONF",
+    "T83x_LSC_READ_HITS",
+    "T83x_LSC_READ_OP",
+    "T83x_LSC_WRITE_HITS",
+    "T83x_LSC_WRITE_OP",
+    "T83x_LSC_ATOMIC_HITS",
+    "T83x_LSC_ATOMIC_OP",
+    "T83x_LSC_LINE_FETCHES",
+    "T83x_LSC_DIRTY_LINE",
+    "T83x_LSC_SNOOPS",
+    "T83x_AXI_TLB_STALL",
+    "T83x_AXI_TLB_MISS",
+    "T83x_AXI_TLB_TRANSACTION",
+    "T83x_LS_TLB_MISS",
+    "T83x_LS_TLB_HIT",
+    "T83x_AXI_BEATS_READ",
+    "T83x_AXI_BEATS_WRITTEN",
 
-        /*L2 and MMU */
-        "",
-        "",
-        "",
-        "",
-        "T83x_MMU_HIT",
-        "T83x_MMU_NEW_MISS",
-        "T83x_MMU_REPLAY_FULL",
-        "T83x_MMU_REPLAY_MISS",
-        "T83x_MMU_TABLE_WALK",
-        "T83x_MMU_REQUESTS",
-        "",
-        "",
-        "T83x_UTLB_HIT",
-        "T83x_UTLB_NEW_MISS",
-        "T83x_UTLB_REPLAY_FULL",
-        "T83x_UTLB_REPLAY_MISS",
-        "T83x_UTLB_STALL",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "T83x_L2_EXT_WRITE_BEATS",
-        "T83x_L2_EXT_READ_BEATS",
-        "T83x_L2_ANY_LOOKUP",
-        "T83x_L2_READ_LOOKUP",
-        "T83x_L2_SREAD_LOOKUP",
-        "T83x_L2_READ_REPLAY",
-        "T83x_L2_READ_SNOOP",
-        "T83x_L2_READ_HIT",
-        "T83x_L2_CLEAN_MISS",
-        "T83x_L2_WRITE_LOOKUP",
-        "T83x_L2_SWRITE_LOOKUP",
-        "T83x_L2_WRITE_REPLAY",
-        "T83x_L2_WRITE_SNOOP",
-        "T83x_L2_WRITE_HIT",
-        "T83x_L2_EXT_READ_FULL",
-        "",
-        "T83x_L2_EXT_WRITE_FULL",
-        "T83x_L2_EXT_R_W_HAZARD",
-        "T83x_L2_EXT_READ",
-        "T83x_L2_EXT_READ_LINE",
-        "T83x_L2_EXT_WRITE",
-        "T83x_L2_EXT_WRITE_LINE",
-        "T83x_L2_EXT_WRITE_SMALL",
-        "T83x_L2_EXT_BARRIER",
-        "T83x_L2_EXT_AR_STALL",
-        "T83x_L2_EXT_R_BUF_FULL",
-        "T83x_L2_EXT_RD_BUF_FULL",
-        "T83x_L2_EXT_R_RAW",
-        "T83x_L2_EXT_W_STALL",
-        "T83x_L2_EXT_W_BUF_FULL",
-        "T83x_L2_EXT_R_BUF_FULL",
-        "T83x_L2_TAG_HAZARD",
-        "T83x_L2_SNOOP_FULL",
-        "T83x_L2_REPLAY_FULL"
-    };
+    /*L2 and MMU */
+    "",
+    "",
+    "",
+    "",
+    "T83x_MMU_HIT",
+    "T83x_MMU_NEW_MISS",
+    "T83x_MMU_REPLAY_FULL",
+    "T83x_MMU_REPLAY_MISS",
+    "T83x_MMU_TABLE_WALK",
+    "T83x_MMU_REQUESTS",
+    "",
+    "",
+    "T83x_UTLB_HIT",
+    "T83x_UTLB_NEW_MISS",
+    "T83x_UTLB_REPLAY_FULL",
+    "T83x_UTLB_REPLAY_MISS",
+    "T83x_UTLB_STALL",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "T83x_L2_EXT_WRITE_BEATS",
+    "T83x_L2_EXT_READ_BEATS",
+    "T83x_L2_ANY_LOOKUP",
+    "T83x_L2_READ_LOOKUP",
+    "T83x_L2_SREAD_LOOKUP",
+    "T83x_L2_READ_REPLAY",
+    "T83x_L2_READ_SNOOP",
+    "T83x_L2_READ_HIT",
+    "T83x_L2_CLEAN_MISS",
+    "T83x_L2_WRITE_LOOKUP",
+    "T83x_L2_SWRITE_LOOKUP",
+    "T83x_L2_WRITE_REPLAY",
+    "T83x_L2_WRITE_SNOOP",
+    "T83x_L2_WRITE_HIT",
+    "T83x_L2_EXT_READ_FULL",
+    "",
+    "T83x_L2_EXT_WRITE_FULL",
+    "T83x_L2_EXT_R_W_HAZARD",
+    "T83x_L2_EXT_READ",
+    "T83x_L2_EXT_READ_LINE",
+    "T83x_L2_EXT_WRITE",
+    "T83x_L2_EXT_WRITE_LINE",
+    "T83x_L2_EXT_WRITE_SMALL",
+    "T83x_L2_EXT_BARRIER",
+    "T83x_L2_EXT_AR_STALL",
+    "T83x_L2_EXT_R_BUF_FULL",
+    "T83x_L2_EXT_RD_BUF_FULL",
+    "T83x_L2_EXT_R_RAW",
+    "T83x_L2_EXT_W_STALL",
+    "T83x_L2_EXT_W_BUF_FULL",
+    "T83x_L2_EXT_R_BUF_FULL",
+    "T83x_L2_TAG_HAZARD",
+    "T83x_L2_SNOOP_FULL",
+    "T83x_L2_REPLAY_FULL"
+};
 
-    static const char * const hardware_counters_mali_t86x[] = {
-        /* Job Manager */
-        "",
-        "",
-        "",
-        "",
-        "T86x_MESSAGES_SENT",
-        "T86x_MESSAGES_RECEIVED",
-        "T86x_GPU_ACTIVE",
-        "T86x_IRQ_ACTIVE",
-        "T86x_JS0_JOBS",
-        "T86x_JS0_TASKS",
-        "T86x_JS0_ACTIVE",
-        "",
-        "T86x_JS0_WAIT_READ",
-        "T86x_JS0_WAIT_ISSUE",
-        "T86x_JS0_WAIT_DEPEND",
-        "T86x_JS0_WAIT_FINISH",
-        "T86x_JS1_JOBS",
-        "T86x_JS1_TASKS",
-        "T86x_JS1_ACTIVE",
-        "",
-        "T86x_JS1_WAIT_READ",
-        "T86x_JS1_WAIT_ISSUE",
-        "T86x_JS1_WAIT_DEPEND",
-        "T86x_JS1_WAIT_FINISH",
-        "T86x_JS2_JOBS",
-        "T86x_JS2_TASKS",
-        "T86x_JS2_ACTIVE",
-        "",
-        "T86x_JS2_WAIT_READ",
-        "T86x_JS2_WAIT_ISSUE",
-        "T86x_JS2_WAIT_DEPEND",
-        "T86x_JS2_WAIT_FINISH",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
+static const char *const hardware_counters_mali_t86x[] =
+{
+    /* Job Manager */
+    "",
+    "",
+    "",
+    "",
+    "T86x_MESSAGES_SENT",
+    "T86x_MESSAGES_RECEIVED",
+    "T86x_GPU_ACTIVE",
+    "T86x_IRQ_ACTIVE",
+    "T86x_JS0_JOBS",
+    "T86x_JS0_TASKS",
+    "T86x_JS0_ACTIVE",
+    "",
+    "T86x_JS0_WAIT_READ",
+    "T86x_JS0_WAIT_ISSUE",
+    "T86x_JS0_WAIT_DEPEND",
+    "T86x_JS0_WAIT_FINISH",
+    "T86x_JS1_JOBS",
+    "T86x_JS1_TASKS",
+    "T86x_JS1_ACTIVE",
+    "",
+    "T86x_JS1_WAIT_READ",
+    "T86x_JS1_WAIT_ISSUE",
+    "T86x_JS1_WAIT_DEPEND",
+    "T86x_JS1_WAIT_FINISH",
+    "T86x_JS2_JOBS",
+    "T86x_JS2_TASKS",
+    "T86x_JS2_ACTIVE",
+    "",
+    "T86x_JS2_WAIT_READ",
+    "T86x_JS2_WAIT_ISSUE",
+    "T86x_JS2_WAIT_DEPEND",
+    "T86x_JS2_WAIT_FINISH",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
 
-        /*Tiler */
-        "",
-        "",
-        "",
-        "T86x_TI_JOBS_PROCESSED",
-        "T86x_TI_TRIANGLES",
-        "T86x_TI_QUADS",
-        "T86x_TI_POLYGONS",
-        "T86x_TI_POINTS",
-        "T86x_TI_LINES",
-        "T86x_TI_VCACHE_HIT",
-        "T86x_TI_VCACHE_MISS",
-        "T86x_TI_FRONT_FACING",
-        "T86x_TI_BACK_FACING",
-        "T86x_TI_PRIM_VISIBLE",
-        "T86x_TI_PRIM_CULLED",
-        "T86x_TI_PRIM_CLIPPED",
-        "T86x_TI_LEVEL0",
-        "T86x_TI_LEVEL1",
-        "T86x_TI_LEVEL2",
-        "T86x_TI_LEVEL3",
-        "T86x_TI_LEVEL4",
-        "T86x_TI_LEVEL5",
-        "T86x_TI_LEVEL6",
-        "T86x_TI_LEVEL7",
-        "T86x_TI_COMMAND_1",
-        "T86x_TI_COMMAND_2",
-        "T86x_TI_COMMAND_3",
-        "T86x_TI_COMMAND_4",
-        "T86x_TI_COMMAND_5_7",
-        "T86x_TI_COMMAND_8_15",
-        "T86x_TI_COMMAND_16_63",
-        "T86x_TI_COMMAND_64",
-        "T86x_TI_COMPRESS_IN",
-        "T86x_TI_COMPRESS_OUT",
-        "T86x_TI_COMPRESS_FLUSH",
-        "T86x_TI_TIMESTAMPS",
-        "T86x_TI_PCACHE_HIT",
-        "T86x_TI_PCACHE_MISS",
-        "T86x_TI_PCACHE_LINE",
-        "T86x_TI_PCACHE_STALL",
-        "T86x_TI_WRBUF_HIT",
-        "T86x_TI_WRBUF_MISS",
-        "T86x_TI_WRBUF_LINE",
-        "T86x_TI_WRBUF_PARTIAL",
-        "T86x_TI_WRBUF_STALL",
-        "T86x_TI_ACTIVE",
-        "T86x_TI_LOADING_DESC",
-        "T86x_TI_INDEX_WAIT",
-        "T86x_TI_INDEX_RANGE_WAIT",
-        "T86x_TI_VERTEX_WAIT",
-        "T86x_TI_PCACHE_WAIT",
-        "T86x_TI_WRBUF_WAIT",
-        "T86x_TI_BUS_READ",
-        "T86x_TI_BUS_WRITE",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "T86x_TI_UTLB_HIT",
-        "T86x_TI_UTLB_NEW_MISS",
-        "T86x_TI_UTLB_REPLAY_FULL",
-        "T86x_TI_UTLB_REPLAY_MISS",
-        "T86x_TI_UTLB_STALL",
+    /*Tiler */
+    "",
+    "",
+    "",
+    "T86x_TI_JOBS_PROCESSED",
+    "T86x_TI_TRIANGLES",
+    "T86x_TI_QUADS",
+    "T86x_TI_POLYGONS",
+    "T86x_TI_POINTS",
+    "T86x_TI_LINES",
+    "T86x_TI_VCACHE_HIT",
+    "T86x_TI_VCACHE_MISS",
+    "T86x_TI_FRONT_FACING",
+    "T86x_TI_BACK_FACING",
+    "T86x_TI_PRIM_VISIBLE",
+    "T86x_TI_PRIM_CULLED",
+    "T86x_TI_PRIM_CLIPPED",
+    "T86x_TI_LEVEL0",
+    "T86x_TI_LEVEL1",
+    "T86x_TI_LEVEL2",
+    "T86x_TI_LEVEL3",
+    "T86x_TI_LEVEL4",
+    "T86x_TI_LEVEL5",
+    "T86x_TI_LEVEL6",
+    "T86x_TI_LEVEL7",
+    "T86x_TI_COMMAND_1",
+    "T86x_TI_COMMAND_2",
+    "T86x_TI_COMMAND_3",
+    "T86x_TI_COMMAND_4",
+    "T86x_TI_COMMAND_5_7",
+    "T86x_TI_COMMAND_8_15",
+    "T86x_TI_COMMAND_16_63",
+    "T86x_TI_COMMAND_64",
+    "T86x_TI_COMPRESS_IN",
+    "T86x_TI_COMPRESS_OUT",
+    "T86x_TI_COMPRESS_FLUSH",
+    "T86x_TI_TIMESTAMPS",
+    "T86x_TI_PCACHE_HIT",
+    "T86x_TI_PCACHE_MISS",
+    "T86x_TI_PCACHE_LINE",
+    "T86x_TI_PCACHE_STALL",
+    "T86x_TI_WRBUF_HIT",
+    "T86x_TI_WRBUF_MISS",
+    "T86x_TI_WRBUF_LINE",
+    "T86x_TI_WRBUF_PARTIAL",
+    "T86x_TI_WRBUF_STALL",
+    "T86x_TI_ACTIVE",
+    "T86x_TI_LOADING_DESC",
+    "T86x_TI_INDEX_WAIT",
+    "T86x_TI_INDEX_RANGE_WAIT",
+    "T86x_TI_VERTEX_WAIT",
+    "T86x_TI_PCACHE_WAIT",
+    "T86x_TI_WRBUF_WAIT",
+    "T86x_TI_BUS_READ",
+    "T86x_TI_BUS_WRITE",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "T86x_TI_UTLB_HIT",
+    "T86x_TI_UTLB_NEW_MISS",
+    "T86x_TI_UTLB_REPLAY_FULL",
+    "T86x_TI_UTLB_REPLAY_MISS",
+    "T86x_TI_UTLB_STALL",
 
-        /* Shader Core */
-        "",
-        "",
-        "",
-        "",
-        "T86x_FRAG_ACTIVE",
-        "T86x_FRAG_PRIMITIVES",
-        "T86x_FRAG_PRIMITIVES_DROPPED",
-        "T86x_FRAG_CYCLES_DESC",
-        "T86x_FRAG_CYCLES_FPKQ_ACTIVE",
-        "T86x_FRAG_CYCLES_VERT",
-        "T86x_FRAG_CYCLES_TRISETUP",
-        "T86x_FRAG_CYCLES_EZS_ACTIVE",
-        "T86x_FRAG_THREADS",
-        "T86x_FRAG_DUMMY_THREADS",
-        "T86x_FRAG_QUADS_RAST",
-        "T86x_FRAG_QUADS_EZS_TEST",
-        "T86x_FRAG_QUADS_EZS_KILLED",
-        "T86x_FRAG_THREADS_LZS_TEST",
-        "T86x_FRAG_THREADS_LZS_KILLED",
-        "T86x_FRAG_CYCLES_NO_TILE",
-        "T86x_FRAG_NUM_TILES",
-        "T86x_FRAG_TRANS_ELIM",
-        "T86x_COMPUTE_ACTIVE",
-        "T86x_COMPUTE_TASKS",
-        "T86x_COMPUTE_THREADS",
-        "T86x_COMPUTE_CYCLES_DESC",
-        "T86x_TRIPIPE_ACTIVE",
-        "T86x_ARITH_WORDS",
-        "T86x_ARITH_CYCLES_REG",
-        "T86x_ARITH_CYCLES_L0",
-        "T86x_ARITH_FRAG_DEPEND",
-        "T86x_LS_WORDS",
-        "T86x_LS_ISSUES",
-        "T86x_LS_REISSUE_ATTR",
-        "T86x_LS_REISSUES_VARY",
-        "T86x_LS_VARY_RV_MISS",
-        "T86x_LS_VARY_RV_HIT",
-        "T86x_LS_NO_UNPARK",
-        "T86x_TEX_WORDS",
-        "T86x_TEX_BUBBLES",
-        "T86x_TEX_WORDS_L0",
-        "T86x_TEX_WORDS_DESC",
-        "T86x_TEX_ISSUES",
-        "T86x_TEX_RECIRC_FMISS",
-        "T86x_TEX_RECIRC_DESC",
-        "T86x_TEX_RECIRC_MULTI",
-        "T86x_TEX_RECIRC_PMISS",
-        "T86x_TEX_RECIRC_CONF",
-        "T86x_LSC_READ_HITS",
-        "T86x_LSC_READ_OP",
-        "T86x_LSC_WRITE_HITS",
-        "T86x_LSC_WRITE_OP",
-        "T86x_LSC_ATOMIC_HITS",
-        "T86x_LSC_ATOMIC_OP",
-        "T86x_LSC_LINE_FETCHES",
-        "T86x_LSC_DIRTY_LINE",
-        "T86x_LSC_SNOOPS",
-        "T86x_AXI_TLB_STALL",
-        "T86x_AXI_TLB_MISS",
-        "T86x_AXI_TLB_TRANSACTION",
-        "T86x_LS_TLB_MISS",
-        "T86x_LS_TLB_HIT",
-        "T86x_AXI_BEATS_READ",
-        "T86x_AXI_BEATS_WRITTEN",
+    /* Shader Core */
+    "",
+    "",
+    "",
+    "",
+    "T86x_FRAG_ACTIVE",
+    "T86x_FRAG_PRIMITIVES",
+    "T86x_FRAG_PRIMITIVES_DROPPED",
+    "T86x_FRAG_CYCLES_DESC",
+    "T86x_FRAG_CYCLES_FPKQ_ACTIVE",
+    "T86x_FRAG_CYCLES_VERT",
+    "T86x_FRAG_CYCLES_TRISETUP",
+    "T86x_FRAG_CYCLES_EZS_ACTIVE",
+    "T86x_FRAG_THREADS",
+    "T86x_FRAG_DUMMY_THREADS",
+    "T86x_FRAG_QUADS_RAST",
+    "T86x_FRAG_QUADS_EZS_TEST",
+    "T86x_FRAG_QUADS_EZS_KILLED",
+    "T86x_FRAG_THREADS_LZS_TEST",
+    "T86x_FRAG_THREADS_LZS_KILLED",
+    "T86x_FRAG_CYCLES_NO_TILE",
+    "T86x_FRAG_NUM_TILES",
+    "T86x_FRAG_TRANS_ELIM",
+    "T86x_COMPUTE_ACTIVE",
+    "T86x_COMPUTE_TASKS",
+    "T86x_COMPUTE_THREADS",
+    "T86x_COMPUTE_CYCLES_DESC",
+    "T86x_TRIPIPE_ACTIVE",
+    "T86x_ARITH_WORDS",
+    "T86x_ARITH_CYCLES_REG",
+    "T86x_ARITH_CYCLES_L0",
+    "T86x_ARITH_FRAG_DEPEND",
+    "T86x_LS_WORDS",
+    "T86x_LS_ISSUES",
+    "T86x_LS_REISSUE_ATTR",
+    "T86x_LS_REISSUES_VARY",
+    "T86x_LS_VARY_RV_MISS",
+    "T86x_LS_VARY_RV_HIT",
+    "T86x_LS_NO_UNPARK",
+    "T86x_TEX_WORDS",
+    "T86x_TEX_BUBBLES",
+    "T86x_TEX_WORDS_L0",
+    "T86x_TEX_WORDS_DESC",
+    "T86x_TEX_ISSUES",
+    "T86x_TEX_RECIRC_FMISS",
+    "T86x_TEX_RECIRC_DESC",
+    "T86x_TEX_RECIRC_MULTI",
+    "T86x_TEX_RECIRC_PMISS",
+    "T86x_TEX_RECIRC_CONF",
+    "T86x_LSC_READ_HITS",
+    "T86x_LSC_READ_OP",
+    "T86x_LSC_WRITE_HITS",
+    "T86x_LSC_WRITE_OP",
+    "T86x_LSC_ATOMIC_HITS",
+    "T86x_LSC_ATOMIC_OP",
+    "T86x_LSC_LINE_FETCHES",
+    "T86x_LSC_DIRTY_LINE",
+    "T86x_LSC_SNOOPS",
+    "T86x_AXI_TLB_STALL",
+    "T86x_AXI_TLB_MISS",
+    "T86x_AXI_TLB_TRANSACTION",
+    "T86x_LS_TLB_MISS",
+    "T86x_LS_TLB_HIT",
+    "T86x_AXI_BEATS_READ",
+    "T86x_AXI_BEATS_WRITTEN",
 
-        /*L2 and MMU */
-        "",
-        "",
-        "",
-        "",
-        "T86x_MMU_HIT",
-        "T86x_MMU_NEW_MISS",
-        "T86x_MMU_REPLAY_FULL",
-        "T86x_MMU_REPLAY_MISS",
-        "T86x_MMU_TABLE_WALK",
-        "T86x_MMU_REQUESTS",
-        "",
-        "",
-        "T86x_UTLB_HIT",
-        "T86x_UTLB_NEW_MISS",
-        "T86x_UTLB_REPLAY_FULL",
-        "T86x_UTLB_REPLAY_MISS",
-        "T86x_UTLB_STALL",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "T86x_L2_EXT_WRITE_BEATS",
-        "T86x_L2_EXT_READ_BEATS",
-        "T86x_L2_ANY_LOOKUP",
-        "T86x_L2_READ_LOOKUP",
-        "T86x_L2_SREAD_LOOKUP",
-        "T86x_L2_READ_REPLAY",
-        "T86x_L2_READ_SNOOP",
-        "T86x_L2_READ_HIT",
-        "T86x_L2_CLEAN_MISS",
-        "T86x_L2_WRITE_LOOKUP",
-        "T86x_L2_SWRITE_LOOKUP",
-        "T86x_L2_WRITE_REPLAY",
-        "T86x_L2_WRITE_SNOOP",
-        "T86x_L2_WRITE_HIT",
-        "T86x_L2_EXT_READ_FULL",
-        "",
-        "T86x_L2_EXT_WRITE_FULL",
-        "T86x_L2_EXT_R_W_HAZARD",
-        "T86x_L2_EXT_READ",
-        "T86x_L2_EXT_READ_LINE",
-        "T86x_L2_EXT_WRITE",
-        "T86x_L2_EXT_WRITE_LINE",
-        "T86x_L2_EXT_WRITE_SMALL",
-        "T86x_L2_EXT_BARRIER",
-        "T86x_L2_EXT_AR_STALL",
-        "T86x_L2_EXT_R_BUF_FULL",
-        "T86x_L2_EXT_RD_BUF_FULL",
-        "T86x_L2_EXT_R_RAW",
-        "T86x_L2_EXT_W_STALL",
-        "T86x_L2_EXT_W_BUF_FULL",
-        "T86x_L2_EXT_R_BUF_FULL",
-        "T86x_L2_TAG_HAZARD",
-        "T86x_L2_SNOOP_FULL",
-        "T86x_L2_REPLAY_FULL"
-    };
+    /*L2 and MMU */
+    "",
+    "",
+    "",
+    "",
+    "T86x_MMU_HIT",
+    "T86x_MMU_NEW_MISS",
+    "T86x_MMU_REPLAY_FULL",
+    "T86x_MMU_REPLAY_MISS",
+    "T86x_MMU_TABLE_WALK",
+    "T86x_MMU_REQUESTS",
+    "",
+    "",
+    "T86x_UTLB_HIT",
+    "T86x_UTLB_NEW_MISS",
+    "T86x_UTLB_REPLAY_FULL",
+    "T86x_UTLB_REPLAY_MISS",
+    "T86x_UTLB_STALL",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "T86x_L2_EXT_WRITE_BEATS",
+    "T86x_L2_EXT_READ_BEATS",
+    "T86x_L2_ANY_LOOKUP",
+    "T86x_L2_READ_LOOKUP",
+    "T86x_L2_SREAD_LOOKUP",
+    "T86x_L2_READ_REPLAY",
+    "T86x_L2_READ_SNOOP",
+    "T86x_L2_READ_HIT",
+    "T86x_L2_CLEAN_MISS",
+    "T86x_L2_WRITE_LOOKUP",
+    "T86x_L2_SWRITE_LOOKUP",
+    "T86x_L2_WRITE_REPLAY",
+    "T86x_L2_WRITE_SNOOP",
+    "T86x_L2_WRITE_HIT",
+    "T86x_L2_EXT_READ_FULL",
+    "",
+    "T86x_L2_EXT_WRITE_FULL",
+    "T86x_L2_EXT_R_W_HAZARD",
+    "T86x_L2_EXT_READ",
+    "T86x_L2_EXT_READ_LINE",
+    "T86x_L2_EXT_WRITE",
+    "T86x_L2_EXT_WRITE_LINE",
+    "T86x_L2_EXT_WRITE_SMALL",
+    "T86x_L2_EXT_BARRIER",
+    "T86x_L2_EXT_AR_STALL",
+    "T86x_L2_EXT_R_BUF_FULL",
+    "T86x_L2_EXT_RD_BUF_FULL",
+    "T86x_L2_EXT_R_RAW",
+    "T86x_L2_EXT_W_STALL",
+    "T86x_L2_EXT_W_BUF_FULL",
+    "T86x_L2_EXT_R_BUF_FULL",
+    "T86x_L2_TAG_HAZARD",
+    "T86x_L2_SNOOP_FULL",
+    "T86x_L2_REPLAY_FULL"
+};
 
-    static const char * const hardware_counters_mali_t88x[] = {
-        /* Job Manager */
-        "",
-        "",
-        "",
-        "",
-        "T88x_MESSAGES_SENT",
-        "T88x_MESSAGES_RECEIVED",
-        "T88x_GPU_ACTIVE",
-        "T88x_IRQ_ACTIVE",
-        "T88x_JS0_JOBS",
-        "T88x_JS0_TASKS",
-        "T88x_JS0_ACTIVE",
-        "",
-        "T88x_JS0_WAIT_READ",
-        "T88x_JS0_WAIT_ISSUE",
-        "T88x_JS0_WAIT_DEPEND",
-        "T88x_JS0_WAIT_FINISH",
-        "T88x_JS1_JOBS",
-        "T88x_JS1_TASKS",
-        "T88x_JS1_ACTIVE",
-        "",
-        "T88x_JS1_WAIT_READ",
-        "T88x_JS1_WAIT_ISSUE",
-        "T88x_JS1_WAIT_DEPEND",
-        "T88x_JS1_WAIT_FINISH",
-        "T88x_JS2_JOBS",
-        "T88x_JS2_TASKS",
-        "T88x_JS2_ACTIVE",
-        "",
-        "T88x_JS2_WAIT_READ",
-        "T88x_JS2_WAIT_ISSUE",
-        "T88x_JS2_WAIT_DEPEND",
-        "T88x_JS2_WAIT_FINISH",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
+static const char *const hardware_counters_mali_t88x[] =
+{
+    /* Job Manager */
+    "",
+    "",
+    "",
+    "",
+    "T88x_MESSAGES_SENT",
+    "T88x_MESSAGES_RECEIVED",
+    "T88x_GPU_ACTIVE",
+    "T88x_IRQ_ACTIVE",
+    "T88x_JS0_JOBS",
+    "T88x_JS0_TASKS",
+    "T88x_JS0_ACTIVE",
+    "",
+    "T88x_JS0_WAIT_READ",
+    "T88x_JS0_WAIT_ISSUE",
+    "T88x_JS0_WAIT_DEPEND",
+    "T88x_JS0_WAIT_FINISH",
+    "T88x_JS1_JOBS",
+    "T88x_JS1_TASKS",
+    "T88x_JS1_ACTIVE",
+    "",
+    "T88x_JS1_WAIT_READ",
+    "T88x_JS1_WAIT_ISSUE",
+    "T88x_JS1_WAIT_DEPEND",
+    "T88x_JS1_WAIT_FINISH",
+    "T88x_JS2_JOBS",
+    "T88x_JS2_TASKS",
+    "T88x_JS2_ACTIVE",
+    "",
+    "T88x_JS2_WAIT_READ",
+    "T88x_JS2_WAIT_ISSUE",
+    "T88x_JS2_WAIT_DEPEND",
+    "T88x_JS2_WAIT_FINISH",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
 
-        /*Tiler */
-        "",
-        "",
-        "",
-        "T88x_TI_JOBS_PROCESSED",
-        "T88x_TI_TRIANGLES",
-        "T88x_TI_QUADS",
-        "T88x_TI_POLYGONS",
-        "T88x_TI_POINTS",
-        "T88x_TI_LINES",
-        "T88x_TI_VCACHE_HIT",
-        "T88x_TI_VCACHE_MISS",
-        "T88x_TI_FRONT_FACING",
-        "T88x_TI_BACK_FACING",
-        "T88x_TI_PRIM_VISIBLE",
-        "T88x_TI_PRIM_CULLED",
-        "T88x_TI_PRIM_CLIPPED",
-        "T88x_TI_LEVEL0",
-        "T88x_TI_LEVEL1",
-        "T88x_TI_LEVEL2",
-        "T88x_TI_LEVEL3",
-        "T88x_TI_LEVEL4",
-        "T88x_TI_LEVEL5",
-        "T88x_TI_LEVEL6",
-        "T88x_TI_LEVEL7",
-        "T88x_TI_COMMAND_1",
-        "T88x_TI_COMMAND_2",
-        "T88x_TI_COMMAND_3",
-        "T88x_TI_COMMAND_4",
-        "T88x_TI_COMMAND_5_7",
-        "T88x_TI_COMMAND_8_15",
-        "T88x_TI_COMMAND_16_63",
-        "T88x_TI_COMMAND_64",
-        "T88x_TI_COMPRESS_IN",
-        "T88x_TI_COMPRESS_OUT",
-        "T88x_TI_COMPRESS_FLUSH",
-        "T88x_TI_TIMESTAMPS",
-        "T88x_TI_PCACHE_HIT",
-        "T88x_TI_PCACHE_MISS",
-        "T88x_TI_PCACHE_LINE",
-        "T88x_TI_PCACHE_STALL",
-        "T88x_TI_WRBUF_HIT",
-        "T88x_TI_WRBUF_MISS",
-        "T88x_TI_WRBUF_LINE",
-        "T88x_TI_WRBUF_PARTIAL",
-        "T88x_TI_WRBUF_STALL",
-        "T88x_TI_ACTIVE",
-        "T88x_TI_LOADING_DESC",
-        "T88x_TI_INDEX_WAIT",
-        "T88x_TI_INDEX_RANGE_WAIT",
-        "T88x_TI_VERTEX_WAIT",
-        "T88x_TI_PCACHE_WAIT",
-        "T88x_TI_WRBUF_WAIT",
-        "T88x_TI_BUS_READ",
-        "T88x_TI_BUS_WRITE",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "T88x_TI_UTLB_HIT",
-        "T88x_TI_UTLB_NEW_MISS",
-        "T88x_TI_UTLB_REPLAY_FULL",
-        "T88x_TI_UTLB_REPLAY_MISS",
-        "T88x_TI_UTLB_STALL",
+    /*Tiler */
+    "",
+    "",
+    "",
+    "T88x_TI_JOBS_PROCESSED",
+    "T88x_TI_TRIANGLES",
+    "T88x_TI_QUADS",
+    "T88x_TI_POLYGONS",
+    "T88x_TI_POINTS",
+    "T88x_TI_LINES",
+    "T88x_TI_VCACHE_HIT",
+    "T88x_TI_VCACHE_MISS",
+    "T88x_TI_FRONT_FACING",
+    "T88x_TI_BACK_FACING",
+    "T88x_TI_PRIM_VISIBLE",
+    "T88x_TI_PRIM_CULLED",
+    "T88x_TI_PRIM_CLIPPED",
+    "T88x_TI_LEVEL0",
+    "T88x_TI_LEVEL1",
+    "T88x_TI_LEVEL2",
+    "T88x_TI_LEVEL3",
+    "T88x_TI_LEVEL4",
+    "T88x_TI_LEVEL5",
+    "T88x_TI_LEVEL6",
+    "T88x_TI_LEVEL7",
+    "T88x_TI_COMMAND_1",
+    "T88x_TI_COMMAND_2",
+    "T88x_TI_COMMAND_3",
+    "T88x_TI_COMMAND_4",
+    "T88x_TI_COMMAND_5_7",
+    "T88x_TI_COMMAND_8_15",
+    "T88x_TI_COMMAND_16_63",
+    "T88x_TI_COMMAND_64",
+    "T88x_TI_COMPRESS_IN",
+    "T88x_TI_COMPRESS_OUT",
+    "T88x_TI_COMPRESS_FLUSH",
+    "T88x_TI_TIMESTAMPS",
+    "T88x_TI_PCACHE_HIT",
+    "T88x_TI_PCACHE_MISS",
+    "T88x_TI_PCACHE_LINE",
+    "T88x_TI_PCACHE_STALL",
+    "T88x_TI_WRBUF_HIT",
+    "T88x_TI_WRBUF_MISS",
+    "T88x_TI_WRBUF_LINE",
+    "T88x_TI_WRBUF_PARTIAL",
+    "T88x_TI_WRBUF_STALL",
+    "T88x_TI_ACTIVE",
+    "T88x_TI_LOADING_DESC",
+    "T88x_TI_INDEX_WAIT",
+    "T88x_TI_INDEX_RANGE_WAIT",
+    "T88x_TI_VERTEX_WAIT",
+    "T88x_TI_PCACHE_WAIT",
+    "T88x_TI_WRBUF_WAIT",
+    "T88x_TI_BUS_READ",
+    "T88x_TI_BUS_WRITE",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "T88x_TI_UTLB_HIT",
+    "T88x_TI_UTLB_NEW_MISS",
+    "T88x_TI_UTLB_REPLAY_FULL",
+    "T88x_TI_UTLB_REPLAY_MISS",
+    "T88x_TI_UTLB_STALL",
 
-        /* Shader Core */
-        "",
-        "",
-        "",
-        "",
-        "T88x_FRAG_ACTIVE",
-        "T88x_FRAG_PRIMITIVES",
-        "T88x_FRAG_PRIMITIVES_DROPPED",
-        "T88x_FRAG_CYCLES_DESC",
-        "T88x_FRAG_CYCLES_FPKQ_ACTIVE",
-        "T88x_FRAG_CYCLES_VERT",
-        "T88x_FRAG_CYCLES_TRISETUP",
-        "T88x_FRAG_CYCLES_EZS_ACTIVE",
-        "T88x_FRAG_THREADS",
-        "T88x_FRAG_DUMMY_THREADS",
-        "T88x_FRAG_QUADS_RAST",
-        "T88x_FRAG_QUADS_EZS_TEST",
-        "T88x_FRAG_QUADS_EZS_KILLED",
-        "T88x_FRAG_THREADS_LZS_TEST",
-        "T88x_FRAG_THREADS_LZS_KILLED",
-        "T88x_FRAG_CYCLES_NO_TILE",
-        "T88x_FRAG_NUM_TILES",
-        "T88x_FRAG_TRANS_ELIM",
-        "T88x_COMPUTE_ACTIVE",
-        "T88x_COMPUTE_TASKS",
-        "T88x_COMPUTE_THREADS",
-        "T88x_COMPUTE_CYCLES_DESC",
-        "T88x_TRIPIPE_ACTIVE",
-        "T88x_ARITH_WORDS",
-        "T88x_ARITH_CYCLES_REG",
-        "T88x_ARITH_CYCLES_L0",
-        "T88x_ARITH_FRAG_DEPEND",
-        "T88x_LS_WORDS",
-        "T88x_LS_ISSUES",
-        "T88x_LS_REISSUE_ATTR",
-        "T88x_LS_REISSUES_VARY",
-        "T88x_LS_VARY_RV_MISS",
-        "T88x_LS_VARY_RV_HIT",
-        "T88x_LS_NO_UNPARK",
-        "T88x_TEX_WORDS",
-        "T88x_TEX_BUBBLES",
-        "T88x_TEX_WORDS_L0",
-        "T88x_TEX_WORDS_DESC",
-        "T88x_TEX_ISSUES",
-        "T88x_TEX_RECIRC_FMISS",
-        "T88x_TEX_RECIRC_DESC",
-        "T88x_TEX_RECIRC_MULTI",
-        "T88x_TEX_RECIRC_PMISS",
-        "T88x_TEX_RECIRC_CONF",
-        "T88x_LSC_READ_HITS",
-        "T88x_LSC_READ_OP",
-        "T88x_LSC_WRITE_HITS",
-        "T88x_LSC_WRITE_OP",
-        "T88x_LSC_ATOMIC_HITS",
-        "T88x_LSC_ATOMIC_OP",
-        "T88x_LSC_LINE_FETCHES",
-        "T88x_LSC_DIRTY_LINE",
-        "T88x_LSC_SNOOPS",
-        "T88x_AXI_TLB_STALL",
-        "T88x_AXI_TLB_MISS",
-        "T88x_AXI_TLB_TRANSACTION",
-        "T88x_LS_TLB_MISS",
-        "T88x_LS_TLB_HIT",
-        "T88x_AXI_BEATS_READ",
-        "T88x_AXI_BEATS_WRITTEN",
+    /* Shader Core */
+    "",
+    "",
+    "",
+    "",
+    "T88x_FRAG_ACTIVE",
+    "T88x_FRAG_PRIMITIVES",
+    "T88x_FRAG_PRIMITIVES_DROPPED",
+    "T88x_FRAG_CYCLES_DESC",
+    "T88x_FRAG_CYCLES_FPKQ_ACTIVE",
+    "T88x_FRAG_CYCLES_VERT",
+    "T88x_FRAG_CYCLES_TRISETUP",
+    "T88x_FRAG_CYCLES_EZS_ACTIVE",
+    "T88x_FRAG_THREADS",
+    "T88x_FRAG_DUMMY_THREADS",
+    "T88x_FRAG_QUADS_RAST",
+    "T88x_FRAG_QUADS_EZS_TEST",
+    "T88x_FRAG_QUADS_EZS_KILLED",
+    "T88x_FRAG_THREADS_LZS_TEST",
+    "T88x_FRAG_THREADS_LZS_KILLED",
+    "T88x_FRAG_CYCLES_NO_TILE",
+    "T88x_FRAG_NUM_TILES",
+    "T88x_FRAG_TRANS_ELIM",
+    "T88x_COMPUTE_ACTIVE",
+    "T88x_COMPUTE_TASKS",
+    "T88x_COMPUTE_THREADS",
+    "T88x_COMPUTE_CYCLES_DESC",
+    "T88x_TRIPIPE_ACTIVE",
+    "T88x_ARITH_WORDS",
+    "T88x_ARITH_CYCLES_REG",
+    "T88x_ARITH_CYCLES_L0",
+    "T88x_ARITH_FRAG_DEPEND",
+    "T88x_LS_WORDS",
+    "T88x_LS_ISSUES",
+    "T88x_LS_REISSUE_ATTR",
+    "T88x_LS_REISSUES_VARY",
+    "T88x_LS_VARY_RV_MISS",
+    "T88x_LS_VARY_RV_HIT",
+    "T88x_LS_NO_UNPARK",
+    "T88x_TEX_WORDS",
+    "T88x_TEX_BUBBLES",
+    "T88x_TEX_WORDS_L0",
+    "T88x_TEX_WORDS_DESC",
+    "T88x_TEX_ISSUES",
+    "T88x_TEX_RECIRC_FMISS",
+    "T88x_TEX_RECIRC_DESC",
+    "T88x_TEX_RECIRC_MULTI",
+    "T88x_TEX_RECIRC_PMISS",
+    "T88x_TEX_RECIRC_CONF",
+    "T88x_LSC_READ_HITS",
+    "T88x_LSC_READ_OP",
+    "T88x_LSC_WRITE_HITS",
+    "T88x_LSC_WRITE_OP",
+    "T88x_LSC_ATOMIC_HITS",
+    "T88x_LSC_ATOMIC_OP",
+    "T88x_LSC_LINE_FETCHES",
+    "T88x_LSC_DIRTY_LINE",
+    "T88x_LSC_SNOOPS",
+    "T88x_AXI_TLB_STALL",
+    "T88x_AXI_TLB_MISS",
+    "T88x_AXI_TLB_TRANSACTION",
+    "T88x_LS_TLB_MISS",
+    "T88x_LS_TLB_HIT",
+    "T88x_AXI_BEATS_READ",
+    "T88x_AXI_BEATS_WRITTEN",
 
-        /*L2 and MMU */
-        "",
-        "",
-        "",
-        "",
-        "T88x_MMU_HIT",
-        "T88x_MMU_NEW_MISS",
-        "T88x_MMU_REPLAY_FULL",
-        "T88x_MMU_REPLAY_MISS",
-        "T88x_MMU_TABLE_WALK",
-        "T88x_MMU_REQUESTS",
-        "",
-        "",
-        "T88x_UTLB_HIT",
-        "T88x_UTLB_NEW_MISS",
-        "T88x_UTLB_REPLAY_FULL",
-        "T88x_UTLB_REPLAY_MISS",
-        "T88x_UTLB_STALL",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "T88x_L2_EXT_WRITE_BEATS",
-        "T88x_L2_EXT_READ_BEATS",
-        "T88x_L2_ANY_LOOKUP",
-        "T88x_L2_READ_LOOKUP",
-        "T88x_L2_SREAD_LOOKUP",
-        "T88x_L2_READ_REPLAY",
-        "T88x_L2_READ_SNOOP",
-        "T88x_L2_READ_HIT",
-        "T88x_L2_CLEAN_MISS",
-        "T88x_L2_WRITE_LOOKUP",
-        "T88x_L2_SWRITE_LOOKUP",
-        "T88x_L2_WRITE_REPLAY",
-        "T88x_L2_WRITE_SNOOP",
-        "T88x_L2_WRITE_HIT",
-        "T88x_L2_EXT_READ_FULL",
-        "",
-        "T88x_L2_EXT_WRITE_FULL",
-        "T88x_L2_EXT_R_W_HAZARD",
-        "T88x_L2_EXT_READ",
-        "T88x_L2_EXT_READ_LINE",
-        "T88x_L2_EXT_WRITE",
-        "T88x_L2_EXT_WRITE_LINE",
-        "T88x_L2_EXT_WRITE_SMALL",
-        "T88x_L2_EXT_BARRIER",
-        "T88x_L2_EXT_AR_STALL",
-        "T88x_L2_EXT_R_BUF_FULL",
-        "T88x_L2_EXT_RD_BUF_FULL",
-        "T88x_L2_EXT_R_RAW",
-        "T88x_L2_EXT_W_STALL",
-        "T88x_L2_EXT_W_BUF_FULL",
-        "T88x_L2_EXT_R_BUF_FULL",
-        "T88x_L2_TAG_HAZARD",
-        "T88x_L2_SNOOP_FULL",
-        "T88x_L2_REPLAY_FULL"
-    };
+    /*L2 and MMU */
+    "",
+    "",
+    "",
+    "",
+    "T88x_MMU_HIT",
+    "T88x_MMU_NEW_MISS",
+    "T88x_MMU_REPLAY_FULL",
+    "T88x_MMU_REPLAY_MISS",
+    "T88x_MMU_TABLE_WALK",
+    "T88x_MMU_REQUESTS",
+    "",
+    "",
+    "T88x_UTLB_HIT",
+    "T88x_UTLB_NEW_MISS",
+    "T88x_UTLB_REPLAY_FULL",
+    "T88x_UTLB_REPLAY_MISS",
+    "T88x_UTLB_STALL",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "T88x_L2_EXT_WRITE_BEATS",
+    "T88x_L2_EXT_READ_BEATS",
+    "T88x_L2_ANY_LOOKUP",
+    "T88x_L2_READ_LOOKUP",
+    "T88x_L2_SREAD_LOOKUP",
+    "T88x_L2_READ_REPLAY",
+    "T88x_L2_READ_SNOOP",
+    "T88x_L2_READ_HIT",
+    "T88x_L2_CLEAN_MISS",
+    "T88x_L2_WRITE_LOOKUP",
+    "T88x_L2_SWRITE_LOOKUP",
+    "T88x_L2_WRITE_REPLAY",
+    "T88x_L2_WRITE_SNOOP",
+    "T88x_L2_WRITE_HIT",
+    "T88x_L2_EXT_READ_FULL",
+    "",
+    "T88x_L2_EXT_WRITE_FULL",
+    "T88x_L2_EXT_R_W_HAZARD",
+    "T88x_L2_EXT_READ",
+    "T88x_L2_EXT_READ_LINE",
+    "T88x_L2_EXT_WRITE",
+    "T88x_L2_EXT_WRITE_LINE",
+    "T88x_L2_EXT_WRITE_SMALL",
+    "T88x_L2_EXT_BARRIER",
+    "T88x_L2_EXT_AR_STALL",
+    "T88x_L2_EXT_R_BUF_FULL",
+    "T88x_L2_EXT_RD_BUF_FULL",
+    "T88x_L2_EXT_R_RAW",
+    "T88x_L2_EXT_W_STALL",
+    "T88x_L2_EXT_W_BUF_FULL",
+    "T88x_L2_EXT_R_BUF_FULL",
+    "T88x_L2_TAG_HAZARD",
+    "T88x_L2_SNOOP_FULL",
+    "T88x_L2_REPLAY_FULL"
+};
 
-    static const char * const hardware_counters_mali_tHEx[] = {
-        /* Performance counters for the Job Manager */
-        "",
-        "",
-        "",
-        "",
-        "THEx_MESSAGES_SENT",
-        "THEx_MESSAGES_RECEIVED",
-        "THEx_GPU_ACTIVE",
-        "THEx_IRQ_ACTIVE",
-        "THEx_JS0_JOBS",
-        "THEx_JS0_TASKS",
-        "THEx_JS0_ACTIVE",
-        "",
-        "THEx_JS0_WAIT_READ",
-        "THEx_JS0_WAIT_ISSUE",
-        "THEx_JS0_WAIT_DEPEND",
-        "THEx_JS0_WAIT_FINISH",
-        "THEx_JS1_JOBS",
-        "THEx_JS1_TASKS",
-        "THEx_JS1_ACTIVE",
-        "",
-        "THEx_JS1_WAIT_READ",
-        "THEx_JS1_WAIT_ISSUE",
-        "THEx_JS1_WAIT_DEPEND",
-        "THEx_JS1_WAIT_FINISH",
-        "THEx_JS2_JOBS",
-        "THEx_JS2_TASKS",
-        "THEx_JS2_ACTIVE",
-        "",
-        "THEx_JS2_WAIT_READ",
-        "THEx_JS2_WAIT_ISSUE",
-        "THEx_JS2_WAIT_DEPEND",
-        "THEx_JS2_WAIT_FINISH",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
+static const char *const hardware_counters_mali_tHEx[] =
+{
+    /* Performance counters for the Job Manager */
+    "",
+    "",
+    "",
+    "",
+    "THEx_MESSAGES_SENT",
+    "THEx_MESSAGES_RECEIVED",
+    "THEx_GPU_ACTIVE",
+    "THEx_IRQ_ACTIVE",
+    "THEx_JS0_JOBS",
+    "THEx_JS0_TASKS",
+    "THEx_JS0_ACTIVE",
+    "",
+    "THEx_JS0_WAIT_READ",
+    "THEx_JS0_WAIT_ISSUE",
+    "THEx_JS0_WAIT_DEPEND",
+    "THEx_JS0_WAIT_FINISH",
+    "THEx_JS1_JOBS",
+    "THEx_JS1_TASKS",
+    "THEx_JS1_ACTIVE",
+    "",
+    "THEx_JS1_WAIT_READ",
+    "THEx_JS1_WAIT_ISSUE",
+    "THEx_JS1_WAIT_DEPEND",
+    "THEx_JS1_WAIT_FINISH",
+    "THEx_JS2_JOBS",
+    "THEx_JS2_TASKS",
+    "THEx_JS2_ACTIVE",
+    "",
+    "THEx_JS2_WAIT_READ",
+    "THEx_JS2_WAIT_ISSUE",
+    "THEx_JS2_WAIT_DEPEND",
+    "THEx_JS2_WAIT_FINISH",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
 
-        /* Performance counters for the Tiler */
-        "",
-        "",
-        "",
-        "",
-        "THEx_TILER_ACTIVE",
-        "THEx_JOBS_PROCESSED",
-        "THEx_TRIANGLES",
-        "THEx_LINES",
-        "THEx_POINTS",
-        "THEx_FRONT_FACING",
-        "THEx_BACK_FACING",
-        "THEx_PRIM_VISIBLE",
-        "THEx_PRIM_CULLED",
-        "THEx_PRIM_CLIPPED",
-        "THEx_PRIM_SAT_CULLED",
-        "",
-        "",
-        "THEx_BUS_READ",
-        "",
-        "THEx_BUS_WRITE",
-        "THEx_LOADING_DESC",
-        "THEx_IDVS_POS_SHAD_REQ",
-        "THEx_IDVS_POS_SHAD_WAIT",
-        "THEx_IDVS_POS_SHAD_STALL",
-        "THEx_IDVS_POS_FIFO_FULL",
-        "THEx_PREFETCH_STALL",
-        "THEx_VCACHE_HIT",
-        "THEx_VCACHE_MISS",
-        "THEx_VCACHE_LINE_WAIT",
-        "THEx_VFETCH_POS_READ_WAIT",
-        "THEx_VFETCH_VERTEX_WAIT",
-        "THEx_VFETCH_STALL",
-        "THEx_PRIMASSY_STALL",
-        "THEx_BBOX_GEN_STALL",
-        "THEx_IDVS_VBU_HIT",
-        "THEx_IDVS_VBU_MISS",
-        "THEx_IDVS_VBU_LINE_DEALLOCATE",
-        "THEx_IDVS_VAR_SHAD_REQ",
-        "THEx_IDVS_VAR_SHAD_STALL",
-        "THEx_BINNER_STALL",
-        "THEx_ITER_STALL",
-        "THEx_COMPRESS_MISS",
-        "THEx_COMPRESS_STALL",
-        "THEx_PCACHE_HIT",
-        "THEx_PCACHE_MISS",
-        "THEx_PCACHE_MISS_STALL",
-        "THEx_PCACHE_EVICT_STALL",
-        "THEx_PMGR_PTR_WR_STALL",
-        "THEx_PMGR_PTR_RD_STALL",
-        "THEx_PMGR_CMD_WR_STALL",
-        "THEx_WRBUF_ACTIVE",
-        "THEx_WRBUF_HIT",
-        "THEx_WRBUF_MISS",
-        "THEx_WRBUF_NO_FREE_LINE_STALL",
-        "THEx_WRBUF_NO_AXI_ID_STALL",
-        "THEx_WRBUF_AXI_STALL",
-        "",
-        "",
-        "",
-        "THEx_UTLB_TRANS",
-        "THEx_UTLB_TRANS_HIT",
-        "THEx_UTLB_TRANS_STALL",
-        "THEx_UTLB_TRANS_MISS_DELAY",
-        "THEx_UTLB_MMU_REQ",
+    /* Performance counters for the Tiler */
+    "",
+    "",
+    "",
+    "",
+    "THEx_TILER_ACTIVE",
+    "THEx_JOBS_PROCESSED",
+    "THEx_TRIANGLES",
+    "THEx_LINES",
+    "THEx_POINTS",
+    "THEx_FRONT_FACING",
+    "THEx_BACK_FACING",
+    "THEx_PRIM_VISIBLE",
+    "THEx_PRIM_CULLED",
+    "THEx_PRIM_CLIPPED",
+    "THEx_PRIM_SAT_CULLED",
+    "",
+    "",
+    "THEx_BUS_READ",
+    "",
+    "THEx_BUS_WRITE",
+    "THEx_LOADING_DESC",
+    "THEx_IDVS_POS_SHAD_REQ",
+    "THEx_IDVS_POS_SHAD_WAIT",
+    "THEx_IDVS_POS_SHAD_STALL",
+    "THEx_IDVS_POS_FIFO_FULL",
+    "THEx_PREFETCH_STALL",
+    "THEx_VCACHE_HIT",
+    "THEx_VCACHE_MISS",
+    "THEx_VCACHE_LINE_WAIT",
+    "THEx_VFETCH_POS_READ_WAIT",
+    "THEx_VFETCH_VERTEX_WAIT",
+    "THEx_VFETCH_STALL",
+    "THEx_PRIMASSY_STALL",
+    "THEx_BBOX_GEN_STALL",
+    "THEx_IDVS_VBU_HIT",
+    "THEx_IDVS_VBU_MISS",
+    "THEx_IDVS_VBU_LINE_DEALLOCATE",
+    "THEx_IDVS_VAR_SHAD_REQ",
+    "THEx_IDVS_VAR_SHAD_STALL",
+    "THEx_BINNER_STALL",
+    "THEx_ITER_STALL",
+    "THEx_COMPRESS_MISS",
+    "THEx_COMPRESS_STALL",
+    "THEx_PCACHE_HIT",
+    "THEx_PCACHE_MISS",
+    "THEx_PCACHE_MISS_STALL",
+    "THEx_PCACHE_EVICT_STALL",
+    "THEx_PMGR_PTR_WR_STALL",
+    "THEx_PMGR_PTR_RD_STALL",
+    "THEx_PMGR_CMD_WR_STALL",
+    "THEx_WRBUF_ACTIVE",
+    "THEx_WRBUF_HIT",
+    "THEx_WRBUF_MISS",
+    "THEx_WRBUF_NO_FREE_LINE_STALL",
+    "THEx_WRBUF_NO_AXI_ID_STALL",
+    "THEx_WRBUF_AXI_STALL",
+    "",
+    "",
+    "",
+    "THEx_UTLB_TRANS",
+    "THEx_UTLB_TRANS_HIT",
+    "THEx_UTLB_TRANS_STALL",
+    "THEx_UTLB_TRANS_MISS_DELAY",
+    "THEx_UTLB_MMU_REQ",
 
-        /* Performance counters for the Shader Core */
-        "",
-        "",
-        "",
-        "",
-        "THEx_FRAG_ACTIVE",
-        "THEx_FRAG_PRIMITIVES",
-        "THEx_FRAG_PRIM_RAST",
-        "THEx_FRAG_FPK_ACTIVE",
-        "THEx_FRAG_STARVING",
-        "THEx_FRAG_WARPS",
-        "THEx_FRAG_PARTIAL_WARPS",
-        "THEx_FRAG_QUADS_RAST",
-        "THEx_FRAG_QUADS_EZS_TEST",
-        "THEx_FRAG_QUADS_EZS_UPDATE",
-        "THEx_FRAG_QUADS_EZS_KILL",
-        "THEx_FRAG_LZS_TEST",
-        "THEx_FRAG_LZS_KILL",
-        "",
-        "THEx_FRAG_PTILES",
-        "THEx_FRAG_TRANS_ELIM",
-        "THEx_QUAD_FPK_KILLER",
-        "",
-        "THEx_COMPUTE_ACTIVE",
-        "THEx_COMPUTE_TASKS",
-        "THEx_COMPUTE_WARPS",
-        "THEx_COMPUTE_STARVING",
-        "THEx_EXEC_CORE_ACTIVE",
-        "THEx_EXEC_ACTIVE",
-        "THEx_EXEC_INSTR_COUNT",
-        "THEx_EXEC_INSTR_DIVERGED",
-        "THEx_EXEC_INSTR_STARVING",
-        "THEx_ARITH_INSTR_SINGLE_FMA",
-        "THEx_ARITH_INSTR_DOUBLE",
-        "THEx_ARITH_INSTR_MSG",
-        "THEx_ARITH_INSTR_MSG_ONLY",
-        "THEx_TEX_INSTR",
-        "THEx_TEX_INSTR_MIPMAP",
-        "THEx_TEX_INSTR_COMPRESSED",
-        "THEx_TEX_INSTR_3D",
-        "THEx_TEX_INSTR_TRILINEAR",
-        "THEx_TEX_COORD_ISSUE",
-        "THEx_TEX_COORD_STALL",
-        "THEx_TEX_STARVE_CACHE",
-        "THEx_TEX_STARVE_FILTER",
-        "THEx_LS_MEM_READ_FULL",
-        "THEx_LS_MEM_READ_SHORT",
-        "THEx_LS_MEM_WRITE_FULL",
-        "THEx_LS_MEM_WRITE_SHORT",
-        "THEx_LS_MEM_ATOMIC",
-        "THEx_VARY_INSTR",
-        "THEx_VARY_SLOT_32",
-        "THEx_VARY_SLOT_16",
-        "THEx_ATTR_INSTR",
-        "THEx_ARITH_INSTR_FP_MUL",
-        "THEx_BEATS_RD_FTC",
-        "THEx_BEATS_RD_FTC_EXT",
-        "THEx_BEATS_RD_LSC",
-        "THEx_BEATS_RD_LSC_EXT",
-        "THEx_BEATS_RD_TEX",
-        "THEx_BEATS_RD_TEX_EXT",
-        "THEx_BEATS_RD_OTHER",
-        "THEx_BEATS_WR_LSC",
-        "THEx_BEATS_WR_TIB",
-        "",
+    /* Performance counters for the Shader Core */
+    "",
+    "",
+    "",
+    "",
+    "THEx_FRAG_ACTIVE",
+    "THEx_FRAG_PRIMITIVES",
+    "THEx_FRAG_PRIM_RAST",
+    "THEx_FRAG_FPK_ACTIVE",
+    "THEx_FRAG_STARVING",
+    "THEx_FRAG_WARPS",
+    "THEx_FRAG_PARTIAL_WARPS",
+    "THEx_FRAG_QUADS_RAST",
+    "THEx_FRAG_QUADS_EZS_TEST",
+    "THEx_FRAG_QUADS_EZS_UPDATE",
+    "THEx_FRAG_QUADS_EZS_KILL",
+    "THEx_FRAG_LZS_TEST",
+    "THEx_FRAG_LZS_KILL",
+    "",
+    "THEx_FRAG_PTILES",
+    "THEx_FRAG_TRANS_ELIM",
+    "THEx_QUAD_FPK_KILLER",
+    "",
+    "THEx_COMPUTE_ACTIVE",
+    "THEx_COMPUTE_TASKS",
+    "THEx_COMPUTE_WARPS",
+    "THEx_COMPUTE_STARVING",
+    "THEx_EXEC_CORE_ACTIVE",
+    "THEx_EXEC_ACTIVE",
+    "THEx_EXEC_INSTR_COUNT",
+    "THEx_EXEC_INSTR_DIVERGED",
+    "THEx_EXEC_INSTR_STARVING",
+    "THEx_ARITH_INSTR_SINGLE_FMA",
+    "THEx_ARITH_INSTR_DOUBLE",
+    "THEx_ARITH_INSTR_MSG",
+    "THEx_ARITH_INSTR_MSG_ONLY",
+    "THEx_TEX_INSTR",
+    "THEx_TEX_INSTR_MIPMAP",
+    "THEx_TEX_INSTR_COMPRESSED",
+    "THEx_TEX_INSTR_3D",
+    "THEx_TEX_INSTR_TRILINEAR",
+    "THEx_TEX_COORD_ISSUE",
+    "THEx_TEX_COORD_STALL",
+    "THEx_TEX_STARVE_CACHE",
+    "THEx_TEX_STARVE_FILTER",
+    "THEx_LS_MEM_READ_FULL",
+    "THEx_LS_MEM_READ_SHORT",
+    "THEx_LS_MEM_WRITE_FULL",
+    "THEx_LS_MEM_WRITE_SHORT",
+    "THEx_LS_MEM_ATOMIC",
+    "THEx_VARY_INSTR",
+    "THEx_VARY_SLOT_32",
+    "THEx_VARY_SLOT_16",
+    "THEx_ATTR_INSTR",
+    "THEx_ARITH_INSTR_FP_MUL",
+    "THEx_BEATS_RD_FTC",
+    "THEx_BEATS_RD_FTC_EXT",
+    "THEx_BEATS_RD_LSC",
+    "THEx_BEATS_RD_LSC_EXT",
+    "THEx_BEATS_RD_TEX",
+    "THEx_BEATS_RD_TEX_EXT",
+    "THEx_BEATS_RD_OTHER",
+    "THEx_BEATS_WR_LSC",
+    "THEx_BEATS_WR_TIB",
+    "",
 
-        /* Performance counters for the Memory System */
-        "",
-        "",
-        "",
-        "",
-        "THEx_MMU_REQUESTS",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "THEx_L2_RD_MSG_IN",
-        "THEx_L2_RD_MSG_IN_STALL",
-        "THEx_L2_WR_MSG_IN",
-        "THEx_L2_WR_MSG_IN_STALL",
-        "THEx_L2_SNP_MSG_IN",
-        "THEx_L2_SNP_MSG_IN_STALL",
-        "THEx_L2_RD_MSG_OUT",
-        "THEx_L2_RD_MSG_OUT_STALL",
-        "THEx_L2_WR_MSG_OUT",
-        "THEx_L2_ANY_LOOKUP",
-        "THEx_L2_READ_LOOKUP",
-        "THEx_L2_WRITE_LOOKUP",
-        "THEx_L2_EXT_SNOOP_LOOKUP",
-        "THEx_L2_EXT_READ",
-        "THEx_L2_EXT_READ_NOSNP",
-        "THEx_L2_EXT_READ_UNIQUE",
-        "THEx_L2_EXT_READ_BEATS",
-        "THEx_L2_EXT_AR_STALL",
-        "THEx_L2_EXT_AR_CNT_Q1",
-        "THEx_L2_EXT_AR_CNT_Q2",
-        "THEx_L2_EXT_AR_CNT_Q3",
-        "THEx_L2_EXT_RRESP_0_127",
-        "THEx_L2_EXT_RRESP_128_191",
-        "THEx_L2_EXT_RRESP_192_255",
-        "THEx_L2_EXT_RRESP_256_319",
-        "THEx_L2_EXT_RRESP_320_383",
-        "THEx_L2_EXT_WRITE",
-        "THEx_L2_EXT_WRITE_NOSNP_FULL",
-        "THEx_L2_EXT_WRITE_NOSNP_PTL",
-        "THEx_L2_EXT_WRITE_SNP_FULL",
-        "THEx_L2_EXT_WRITE_SNP_PTL",
-        "THEx_L2_EXT_WRITE_BEATS",
-        "THEx_L2_EXT_W_STALL",
-        "THEx_L2_EXT_AW_CNT_Q1",
-        "THEx_L2_EXT_AW_CNT_Q2",
-        "THEx_L2_EXT_AW_CNT_Q3",
-        "THEx_L2_EXT_SNOOP",
-        "THEx_L2_EXT_SNOOP_STALL",
-        "THEx_L2_EXT_SNOOP_RESP_CLEAN",
-        "THEx_L2_EXT_SNOOP_RESP_DATA",
-        "THEx_L2_EXT_SNOOP_INTERNAL",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-    };
+    /* Performance counters for the Memory System */
+    "",
+    "",
+    "",
+    "",
+    "THEx_MMU_REQUESTS",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "THEx_L2_RD_MSG_IN",
+    "THEx_L2_RD_MSG_IN_STALL",
+    "THEx_L2_WR_MSG_IN",
+    "THEx_L2_WR_MSG_IN_STALL",
+    "THEx_L2_SNP_MSG_IN",
+    "THEx_L2_SNP_MSG_IN_STALL",
+    "THEx_L2_RD_MSG_OUT",
+    "THEx_L2_RD_MSG_OUT_STALL",
+    "THEx_L2_WR_MSG_OUT",
+    "THEx_L2_ANY_LOOKUP",
+    "THEx_L2_READ_LOOKUP",
+    "THEx_L2_WRITE_LOOKUP",
+    "THEx_L2_EXT_SNOOP_LOOKUP",
+    "THEx_L2_EXT_READ",
+    "THEx_L2_EXT_READ_NOSNP",
+    "THEx_L2_EXT_READ_UNIQUE",
+    "THEx_L2_EXT_READ_BEATS",
+    "THEx_L2_EXT_AR_STALL",
+    "THEx_L2_EXT_AR_CNT_Q1",
+    "THEx_L2_EXT_AR_CNT_Q2",
+    "THEx_L2_EXT_AR_CNT_Q3",
+    "THEx_L2_EXT_RRESP_0_127",
+    "THEx_L2_EXT_RRESP_128_191",
+    "THEx_L2_EXT_RRESP_192_255",
+    "THEx_L2_EXT_RRESP_256_319",
+    "THEx_L2_EXT_RRESP_320_383",
+    "THEx_L2_EXT_WRITE",
+    "THEx_L2_EXT_WRITE_NOSNP_FULL",
+    "THEx_L2_EXT_WRITE_NOSNP_PTL",
+    "THEx_L2_EXT_WRITE_SNP_FULL",
+    "THEx_L2_EXT_WRITE_SNP_PTL",
+    "THEx_L2_EXT_WRITE_BEATS",
+    "THEx_L2_EXT_W_STALL",
+    "THEx_L2_EXT_AW_CNT_Q1",
+    "THEx_L2_EXT_AW_CNT_Q2",
+    "THEx_L2_EXT_AW_CNT_Q3",
+    "THEx_L2_EXT_SNOOP",
+    "THEx_L2_EXT_SNOOP_STALL",
+    "THEx_L2_EXT_SNOOP_RESP_CLEAN",
+    "THEx_L2_EXT_SNOOP_RESP_DATA",
+    "THEx_L2_EXT_SNOOP_INTERNAL",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+};
 
-    static const char * const hardware_counters_mali_tMIx[] = {
-        /* Performance counters for the Job Manager */
-        "",
-        "",
-        "",
-        "",
-        "TMIx_MESSAGES_SENT",
-        "TMIx_MESSAGES_RECEIVED",
-        "TMIx_GPU_ACTIVE",
-        "TMIx_IRQ_ACTIVE",
-        "TMIx_JS0_JOBS",
-        "TMIx_JS0_TASKS",
-        "TMIx_JS0_ACTIVE",
-        "",
-        "TMIx_JS0_WAIT_READ",
-        "TMIx_JS0_WAIT_ISSUE",
-        "TMIx_JS0_WAIT_DEPEND",
-        "TMIx_JS0_WAIT_FINISH",
-        "TMIx_JS1_JOBS",
-        "TMIx_JS1_TASKS",
-        "TMIx_JS1_ACTIVE",
-        "",
-        "TMIx_JS1_WAIT_READ",
-        "TMIx_JS1_WAIT_ISSUE",
-        "TMIx_JS1_WAIT_DEPEND",
-        "TMIx_JS1_WAIT_FINISH",
-        "TMIx_JS2_JOBS",
-        "TMIx_JS2_TASKS",
-        "TMIx_JS2_ACTIVE",
-        "",
-        "TMIx_JS2_WAIT_READ",
-        "TMIx_JS2_WAIT_ISSUE",
-        "TMIx_JS2_WAIT_DEPEND",
-        "TMIx_JS2_WAIT_FINISH",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
+static const char *const hardware_counters_mali_tMIx[] =
+{
+    /* Performance counters for the Job Manager */
+    "",
+    "",
+    "",
+    "",
+    "TMIx_MESSAGES_SENT",
+    "TMIx_MESSAGES_RECEIVED",
+    "TMIx_GPU_ACTIVE",
+    "TMIx_IRQ_ACTIVE",
+    "TMIx_JS0_JOBS",
+    "TMIx_JS0_TASKS",
+    "TMIx_JS0_ACTIVE",
+    "",
+    "TMIx_JS0_WAIT_READ",
+    "TMIx_JS0_WAIT_ISSUE",
+    "TMIx_JS0_WAIT_DEPEND",
+    "TMIx_JS0_WAIT_FINISH",
+    "TMIx_JS1_JOBS",
+    "TMIx_JS1_TASKS",
+    "TMIx_JS1_ACTIVE",
+    "",
+    "TMIx_JS1_WAIT_READ",
+    "TMIx_JS1_WAIT_ISSUE",
+    "TMIx_JS1_WAIT_DEPEND",
+    "TMIx_JS1_WAIT_FINISH",
+    "TMIx_JS2_JOBS",
+    "TMIx_JS2_TASKS",
+    "TMIx_JS2_ACTIVE",
+    "",
+    "TMIx_JS2_WAIT_READ",
+    "TMIx_JS2_WAIT_ISSUE",
+    "TMIx_JS2_WAIT_DEPEND",
+    "TMIx_JS2_WAIT_FINISH",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
 
-        /* Performance counters for the Tiler */
-        "",
-        "",
-        "",
-        "",
-        "TMIx_TILER_ACTIVE",
-        "TMIx_JOBS_PROCESSED",
-        "TMIx_TRIANGLES",
-        "TMIx_LINES",
-        "TMIx_POINTS",
-        "TMIx_FRONT_FACING",
-        "TMIx_BACK_FACING",
-        "TMIx_PRIM_VISIBLE",
-        "TMIx_PRIM_CULLED",
-        "TMIx_PRIM_CLIPPED",
-        "TMIx_PRIM_SAT_CULLED",
-        "",
-        "",
-        "TMIx_BUS_READ",
-        "",
-        "TMIx_BUS_WRITE",
-        "TMIx_LOADING_DESC",
-        "TMIx_IDVS_POS_SHAD_REQ",
-        "TMIx_IDVS_POS_SHAD_WAIT",
-        "TMIx_IDVS_POS_SHAD_STALL",
-        "TMIx_IDVS_POS_FIFO_FULL",
-        "TMIx_PREFETCH_STALL",
-        "TMIx_VCACHE_HIT",
-        "TMIx_VCACHE_MISS",
-        "TMIx_VCACHE_LINE_WAIT",
-        "TMIx_VFETCH_POS_READ_WAIT",
-        "TMIx_VFETCH_VERTEX_WAIT",
-        "TMIx_VFETCH_STALL",
-        "TMIx_PRIMASSY_STALL",
-        "TMIx_BBOX_GEN_STALL",
-        "TMIx_IDVS_VBU_HIT",
-        "TMIx_IDVS_VBU_MISS",
-        "TMIx_IDVS_VBU_LINE_DEALLOCATE",
-        "TMIx_IDVS_VAR_SHAD_REQ",
-        "TMIx_IDVS_VAR_SHAD_STALL",
-        "TMIx_BINNER_STALL",
-        "TMIx_ITER_STALL",
-        "TMIx_COMPRESS_MISS",
-        "TMIx_COMPRESS_STALL",
-        "TMIx_PCACHE_HIT",
-        "TMIx_PCACHE_MISS",
-        "TMIx_PCACHE_MISS_STALL",
-        "TMIx_PCACHE_EVICT_STALL",
-        "TMIx_PMGR_PTR_WR_STALL",
-        "TMIx_PMGR_PTR_RD_STALL",
-        "TMIx_PMGR_CMD_WR_STALL",
-        "TMIx_WRBUF_ACTIVE",
-        "TMIx_WRBUF_HIT",
-        "TMIx_WRBUF_MISS",
-        "TMIx_WRBUF_NO_FREE_LINE_STALL",
-        "TMIx_WRBUF_NO_AXI_ID_STALL",
-        "TMIx_WRBUF_AXI_STALL",
-        "",
-        "",
-        "",
-        "TMIx_UTLB_TRANS",
-        "TMIx_UTLB_TRANS_HIT",
-        "TMIx_UTLB_TRANS_STALL",
-        "TMIx_UTLB_TRANS_MISS_DELAY",
-        "TMIx_UTLB_MMU_REQ",
+    /* Performance counters for the Tiler */
+    "",
+    "",
+    "",
+    "",
+    "TMIx_TILER_ACTIVE",
+    "TMIx_JOBS_PROCESSED",
+    "TMIx_TRIANGLES",
+    "TMIx_LINES",
+    "TMIx_POINTS",
+    "TMIx_FRONT_FACING",
+    "TMIx_BACK_FACING",
+    "TMIx_PRIM_VISIBLE",
+    "TMIx_PRIM_CULLED",
+    "TMIx_PRIM_CLIPPED",
+    "TMIx_PRIM_SAT_CULLED",
+    "",
+    "",
+    "TMIx_BUS_READ",
+    "",
+    "TMIx_BUS_WRITE",
+    "TMIx_LOADING_DESC",
+    "TMIx_IDVS_POS_SHAD_REQ",
+    "TMIx_IDVS_POS_SHAD_WAIT",
+    "TMIx_IDVS_POS_SHAD_STALL",
+    "TMIx_IDVS_POS_FIFO_FULL",
+    "TMIx_PREFETCH_STALL",
+    "TMIx_VCACHE_HIT",
+    "TMIx_VCACHE_MISS",
+    "TMIx_VCACHE_LINE_WAIT",
+    "TMIx_VFETCH_POS_READ_WAIT",
+    "TMIx_VFETCH_VERTEX_WAIT",
+    "TMIx_VFETCH_STALL",
+    "TMIx_PRIMASSY_STALL",
+    "TMIx_BBOX_GEN_STALL",
+    "TMIx_IDVS_VBU_HIT",
+    "TMIx_IDVS_VBU_MISS",
+    "TMIx_IDVS_VBU_LINE_DEALLOCATE",
+    "TMIx_IDVS_VAR_SHAD_REQ",
+    "TMIx_IDVS_VAR_SHAD_STALL",
+    "TMIx_BINNER_STALL",
+    "TMIx_ITER_STALL",
+    "TMIx_COMPRESS_MISS",
+    "TMIx_COMPRESS_STALL",
+    "TMIx_PCACHE_HIT",
+    "TMIx_PCACHE_MISS",
+    "TMIx_PCACHE_MISS_STALL",
+    "TMIx_PCACHE_EVICT_STALL",
+    "TMIx_PMGR_PTR_WR_STALL",
+    "TMIx_PMGR_PTR_RD_STALL",
+    "TMIx_PMGR_CMD_WR_STALL",
+    "TMIx_WRBUF_ACTIVE",
+    "TMIx_WRBUF_HIT",
+    "TMIx_WRBUF_MISS",
+    "TMIx_WRBUF_NO_FREE_LINE_STALL",
+    "TMIx_WRBUF_NO_AXI_ID_STALL",
+    "TMIx_WRBUF_AXI_STALL",
+    "",
+    "",
+    "",
+    "TMIx_UTLB_TRANS",
+    "TMIx_UTLB_TRANS_HIT",
+    "TMIx_UTLB_TRANS_STALL",
+    "TMIx_UTLB_TRANS_MISS_DELAY",
+    "TMIx_UTLB_MMU_REQ",
 
-        /* Performance counters for the Shader Core */
-        "",
-        "",
-        "",
-        "",
-        "TMIx_FRAG_ACTIVE",
-        "TMIx_FRAG_PRIMITIVES",
-        "TMIx_FRAG_PRIM_RAST",
-        "TMIx_FRAG_FPK_ACTIVE",
-        "TMIx_FRAG_STARVING",
-        "TMIx_FRAG_WARPS",
-        "TMIx_FRAG_PARTIAL_WARPS",
-        "TMIx_FRAG_QUADS_RAST",
-        "TMIx_FRAG_QUADS_EZS_TEST",
-        "TMIx_FRAG_QUADS_EZS_UPDATE",
-        "TMIx_FRAG_QUADS_EZS_KILL",
-        "TMIx_FRAG_LZS_TEST",
-        "TMIx_FRAG_LZS_KILL",
-        "",
-        "TMIx_FRAG_PTILES",
-        "TMIx_FRAG_TRANS_ELIM",
-        "TMIx_QUAD_FPK_KILLER",
-        "",
-        "TMIx_COMPUTE_ACTIVE",
-        "TMIx_COMPUTE_TASKS",
-        "TMIx_COMPUTE_WARPS",
-        "TMIx_COMPUTE_STARVING",
-        "TMIx_EXEC_CORE_ACTIVE",
-        "TMIx_EXEC_ACTIVE",
-        "TMIx_EXEC_INSTR_COUNT",
-        "TMIx_EXEC_INSTR_DIVERGED",
-        "TMIx_EXEC_INSTR_STARVING",
-        "TMIx_ARITH_INSTR_SINGLE_FMA",
-        "TMIx_ARITH_INSTR_DOUBLE",
-        "TMIx_ARITH_INSTR_MSG",
-        "TMIx_ARITH_INSTR_MSG_ONLY",
-        "TMIx_TEX_INSTR",
-        "TMIx_TEX_INSTR_MIPMAP",
-        "TMIx_TEX_INSTR_COMPRESSED",
-        "TMIx_TEX_INSTR_3D",
-        "TMIx_TEX_INSTR_TRILINEAR",
-        "TMIx_TEX_COORD_ISSUE",
-        "TMIx_TEX_COORD_STALL",
-        "TMIx_TEX_STARVE_CACHE",
-        "TMIx_TEX_STARVE_FILTER",
-        "TMIx_LS_MEM_READ_FULL",
-        "TMIx_LS_MEM_READ_SHORT",
-        "TMIx_LS_MEM_WRITE_FULL",
-        "TMIx_LS_MEM_WRITE_SHORT",
-        "TMIx_LS_MEM_ATOMIC",
-        "TMIx_VARY_INSTR",
-        "TMIx_VARY_SLOT_32",
-        "TMIx_VARY_SLOT_16",
-        "TMIx_ATTR_INSTR",
-        "TMIx_ARITH_INSTR_FP_MUL",
-        "TMIx_BEATS_RD_FTC",
-        "TMIx_BEATS_RD_FTC_EXT",
-        "TMIx_BEATS_RD_LSC",
-        "TMIx_BEATS_RD_LSC_EXT",
-        "TMIx_BEATS_RD_TEX",
-        "TMIx_BEATS_RD_TEX_EXT",
-        "TMIx_BEATS_RD_OTHER",
-        "TMIx_BEATS_WR_LSC",
-        "TMIx_BEATS_WR_TIB",
-        "",
+    /* Performance counters for the Shader Core */
+    "",
+    "",
+    "",
+    "",
+    "TMIx_FRAG_ACTIVE",
+    "TMIx_FRAG_PRIMITIVES",
+    "TMIx_FRAG_PRIM_RAST",
+    "TMIx_FRAG_FPK_ACTIVE",
+    "TMIx_FRAG_STARVING",
+    "TMIx_FRAG_WARPS",
+    "TMIx_FRAG_PARTIAL_WARPS",
+    "TMIx_FRAG_QUADS_RAST",
+    "TMIx_FRAG_QUADS_EZS_TEST",
+    "TMIx_FRAG_QUADS_EZS_UPDATE",
+    "TMIx_FRAG_QUADS_EZS_KILL",
+    "TMIx_FRAG_LZS_TEST",
+    "TMIx_FRAG_LZS_KILL",
+    "",
+    "TMIx_FRAG_PTILES",
+    "TMIx_FRAG_TRANS_ELIM",
+    "TMIx_QUAD_FPK_KILLER",
+    "",
+    "TMIx_COMPUTE_ACTIVE",
+    "TMIx_COMPUTE_TASKS",
+    "TMIx_COMPUTE_WARPS",
+    "TMIx_COMPUTE_STARVING",
+    "TMIx_EXEC_CORE_ACTIVE",
+    "TMIx_EXEC_ACTIVE",
+    "TMIx_EXEC_INSTR_COUNT",
+    "TMIx_EXEC_INSTR_DIVERGED",
+    "TMIx_EXEC_INSTR_STARVING",
+    "TMIx_ARITH_INSTR_SINGLE_FMA",
+    "TMIx_ARITH_INSTR_DOUBLE",
+    "TMIx_ARITH_INSTR_MSG",
+    "TMIx_ARITH_INSTR_MSG_ONLY",
+    "TMIx_TEX_INSTR",
+    "TMIx_TEX_INSTR_MIPMAP",
+    "TMIx_TEX_INSTR_COMPRESSED",
+    "TMIx_TEX_INSTR_3D",
+    "TMIx_TEX_INSTR_TRILINEAR",
+    "TMIx_TEX_COORD_ISSUE",
+    "TMIx_TEX_COORD_STALL",
+    "TMIx_TEX_STARVE_CACHE",
+    "TMIx_TEX_STARVE_FILTER",
+    "TMIx_LS_MEM_READ_FULL",
+    "TMIx_LS_MEM_READ_SHORT",
+    "TMIx_LS_MEM_WRITE_FULL",
+    "TMIx_LS_MEM_WRITE_SHORT",
+    "TMIx_LS_MEM_ATOMIC",
+    "TMIx_VARY_INSTR",
+    "TMIx_VARY_SLOT_32",
+    "TMIx_VARY_SLOT_16",
+    "TMIx_ATTR_INSTR",
+    "TMIx_ARITH_INSTR_FP_MUL",
+    "TMIx_BEATS_RD_FTC",
+    "TMIx_BEATS_RD_FTC_EXT",
+    "TMIx_BEATS_RD_LSC",
+    "TMIx_BEATS_RD_LSC_EXT",
+    "TMIx_BEATS_RD_TEX",
+    "TMIx_BEATS_RD_TEX_EXT",
+    "TMIx_BEATS_RD_OTHER",
+    "TMIx_BEATS_WR_LSC",
+    "TMIx_BEATS_WR_TIB",
+    "",
 
-        /* Performance counters for the Memory System */
-        "",
-        "",
-        "",
-        "",
-        "TMIx_MMU_REQUESTS",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "TMIx_L2_RD_MSG_IN",
-        "TMIx_L2_RD_MSG_IN_STALL",
-        "TMIx_L2_WR_MSG_IN",
-        "TMIx_L2_WR_MSG_IN_STALL",
-        "TMIx_L2_SNP_MSG_IN",
-        "TMIx_L2_SNP_MSG_IN_STALL",
-        "TMIx_L2_RD_MSG_OUT",
-        "TMIx_L2_RD_MSG_OUT_STALL",
-        "TMIx_L2_WR_MSG_OUT",
-        "TMIx_L2_ANY_LOOKUP",
-        "TMIx_L2_READ_LOOKUP",
-        "TMIx_L2_WRITE_LOOKUP",
-        "TMIx_L2_EXT_SNOOP_LOOKUP",
-        "TMIx_L2_EXT_READ",
-        "TMIx_L2_EXT_READ_NOSNP",
-        "TMIx_L2_EXT_READ_UNIQUE",
-        "TMIx_L2_EXT_READ_BEATS",
-        "TMIx_L2_EXT_AR_STALL",
-        "TMIx_L2_EXT_AR_CNT_Q1",
-        "TMIx_L2_EXT_AR_CNT_Q2",
-        "TMIx_L2_EXT_AR_CNT_Q3",
-        "TMIx_L2_EXT_RRESP_0_127",
-        "TMIx_L2_EXT_RRESP_128_191",
-        "TMIx_L2_EXT_RRESP_192_255",
-        "TMIx_L2_EXT_RRESP_256_319",
-        "TMIx_L2_EXT_RRESP_320_383",
-        "TMIx_L2_EXT_WRITE",
-        "TMIx_L2_EXT_WRITE_NOSNP_FULL",
-        "TMIx_L2_EXT_WRITE_NOSNP_PTL",
-        "TMIx_L2_EXT_WRITE_SNP_FULL",
-        "TMIx_L2_EXT_WRITE_SNP_PTL",
-        "TMIx_L2_EXT_WRITE_BEATS",
-        "TMIx_L2_EXT_W_STALL",
-        "TMIx_L2_EXT_AW_CNT_Q1",
-        "TMIx_L2_EXT_AW_CNT_Q2",
-        "TMIx_L2_EXT_AW_CNT_Q3",
-        "TMIx_L2_EXT_SNOOP",
-        "TMIx_L2_EXT_SNOOP_STALL",
-        "TMIx_L2_EXT_SNOOP_RESP_CLEAN",
-        "TMIx_L2_EXT_SNOOP_RESP_DATA",
-        "TMIx_L2_EXT_SNOOP_INTERNAL",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-    };
+    /* Performance counters for the Memory System */
+    "",
+    "",
+    "",
+    "",
+    "TMIx_MMU_REQUESTS",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "TMIx_L2_RD_MSG_IN",
+    "TMIx_L2_RD_MSG_IN_STALL",
+    "TMIx_L2_WR_MSG_IN",
+    "TMIx_L2_WR_MSG_IN_STALL",
+    "TMIx_L2_SNP_MSG_IN",
+    "TMIx_L2_SNP_MSG_IN_STALL",
+    "TMIx_L2_RD_MSG_OUT",
+    "TMIx_L2_RD_MSG_OUT_STALL",
+    "TMIx_L2_WR_MSG_OUT",
+    "TMIx_L2_ANY_LOOKUP",
+    "TMIx_L2_READ_LOOKUP",
+    "TMIx_L2_WRITE_LOOKUP",
+    "TMIx_L2_EXT_SNOOP_LOOKUP",
+    "TMIx_L2_EXT_READ",
+    "TMIx_L2_EXT_READ_NOSNP",
+    "TMIx_L2_EXT_READ_UNIQUE",
+    "TMIx_L2_EXT_READ_BEATS",
+    "TMIx_L2_EXT_AR_STALL",
+    "TMIx_L2_EXT_AR_CNT_Q1",
+    "TMIx_L2_EXT_AR_CNT_Q2",
+    "TMIx_L2_EXT_AR_CNT_Q3",
+    "TMIx_L2_EXT_RRESP_0_127",
+    "TMIx_L2_EXT_RRESP_128_191",
+    "TMIx_L2_EXT_RRESP_192_255",
+    "TMIx_L2_EXT_RRESP_256_319",
+    "TMIx_L2_EXT_RRESP_320_383",
+    "TMIx_L2_EXT_WRITE",
+    "TMIx_L2_EXT_WRITE_NOSNP_FULL",
+    "TMIx_L2_EXT_WRITE_NOSNP_PTL",
+    "TMIx_L2_EXT_WRITE_SNP_FULL",
+    "TMIx_L2_EXT_WRITE_SNP_PTL",
+    "TMIx_L2_EXT_WRITE_BEATS",
+    "TMIx_L2_EXT_W_STALL",
+    "TMIx_L2_EXT_AW_CNT_Q1",
+    "TMIx_L2_EXT_AW_CNT_Q2",
+    "TMIx_L2_EXT_AW_CNT_Q3",
+    "TMIx_L2_EXT_SNOOP",
+    "TMIx_L2_EXT_SNOOP_STALL",
+    "TMIx_L2_EXT_SNOOP_RESP_CLEAN",
+    "TMIx_L2_EXT_SNOOP_RESP_DATA",
+    "TMIx_L2_EXT_SNOOP_INTERNAL",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+};
 
-    static const char * const hardware_counters_mali_tSIx[] = {
-        /* Performance counters for the Job Manager */
-        "",
-        "",
-        "",
-        "",
-        "TSIx_MESSAGES_SENT",
-        "TSIx_MESSAGES_RECEIVED",
-        "TSIx_GPU_ACTIVE",
-        "TSIx_IRQ_ACTIVE",
-        "TSIx_JS0_JOBS",
-        "TSIx_JS0_TASKS",
-        "TSIx_JS0_ACTIVE",
-        "",
-        "TSIx_JS0_WAIT_READ",
-        "TSIx_JS0_WAIT_ISSUE",
-        "TSIx_JS0_WAIT_DEPEND",
-        "TSIx_JS0_WAIT_FINISH",
-        "TSIx_JS1_JOBS",
-        "TSIx_JS1_TASKS",
-        "TSIx_JS1_ACTIVE",
-        "",
-        "TSIx_JS1_WAIT_READ",
-        "TSIx_JS1_WAIT_ISSUE",
-        "TSIx_JS1_WAIT_DEPEND",
-        "TSIx_JS1_WAIT_FINISH",
-        "TSIx_JS2_JOBS",
-        "TSIx_JS2_TASKS",
-        "TSIx_JS2_ACTIVE",
-        "",
-        "TSIx_JS2_WAIT_READ",
-        "TSIx_JS2_WAIT_ISSUE",
-        "TSIx_JS2_WAIT_DEPEND",
-        "TSIx_JS2_WAIT_FINISH",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
+static const char *const hardware_counters_mali_tSIx[] =
+{
+    /* Performance counters for the Job Manager */
+    "",
+    "",
+    "",
+    "",
+    "TSIx_MESSAGES_SENT",
+    "TSIx_MESSAGES_RECEIVED",
+    "TSIx_GPU_ACTIVE",
+    "TSIx_IRQ_ACTIVE",
+    "TSIx_JS0_JOBS",
+    "TSIx_JS0_TASKS",
+    "TSIx_JS0_ACTIVE",
+    "",
+    "TSIx_JS0_WAIT_READ",
+    "TSIx_JS0_WAIT_ISSUE",
+    "TSIx_JS0_WAIT_DEPEND",
+    "TSIx_JS0_WAIT_FINISH",
+    "TSIx_JS1_JOBS",
+    "TSIx_JS1_TASKS",
+    "TSIx_JS1_ACTIVE",
+    "",
+    "TSIx_JS1_WAIT_READ",
+    "TSIx_JS1_WAIT_ISSUE",
+    "TSIx_JS1_WAIT_DEPEND",
+    "TSIx_JS1_WAIT_FINISH",
+    "TSIx_JS2_JOBS",
+    "TSIx_JS2_TASKS",
+    "TSIx_JS2_ACTIVE",
+    "",
+    "TSIx_JS2_WAIT_READ",
+    "TSIx_JS2_WAIT_ISSUE",
+    "TSIx_JS2_WAIT_DEPEND",
+    "TSIx_JS2_WAIT_FINISH",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
 
-        /* Performance counters for the Tiler */
-        "",
-        "",
-        "",
-        "",
-        "TSIx_TILER_ACTIVE",
-        "TSIx_JOBS_PROCESSED",
-        "TSIx_TRIANGLES",
-        "TSIx_LINES",
-        "TSIx_POINTS",
-        "TSIx_FRONT_FACING",
-        "TSIx_BACK_FACING",
-        "TSIx_PRIM_VISIBLE",
-        "TSIx_PRIM_CULLED",
-        "TSIx_PRIM_CLIPPED",
-        "TSIx_PRIM_SAT_CULLED",
-        "",
-        "",
-        "TSIx_BUS_READ",
-        "",
-        "TSIx_BUS_WRITE",
-        "TSIx_LOADING_DESC",
-        "",
-        "",
-        "",
-        "",
-        "TSIx_PREFETCH_STALL",
-        "TSIx_VCACHE_HIT",
-        "TSIx_VCACHE_MISS",
-        "TSIx_VCACHE_LINE_WAIT",
-        "TSIx_VFETCH_POS_READ_WAIT",
-        "TSIx_VFETCH_VERTEX_WAIT",
-        "TSIx_VFETCH_STALL",
-        "TSIx_PRIMASSY_STALL",
-        "TSIx_BBOX_GEN_STALL",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "TSIx_BINNER_STALL",
-        "TSIx_ITER_STALL",
-        "TSIx_COMPRESS_MISS",
-        "TSIx_COMPRESS_STALL",
-        "TSIx_PCACHE_HIT",
-        "TSIx_PCACHE_MISS",
-        "TSIx_PCACHE_MISS_STALL",
-        "TSIx_PCACHE_EVICT_STALL",
-        "TSIx_PMGR_PTR_WR_STALL",
-        "TSIx_PMGR_PTR_RD_STALL",
-        "TSIx_PMGR_CMD_WR_STALL",
-        "TSIx_WRBUF_ACTIVE",
-        "TSIx_WRBUF_HIT",
-        "TSIx_WRBUF_MISS",
-        "TSIx_WRBUF_NO_FREE_LINE_STALL",
-        "TSIx_WRBUF_NO_AXI_ID_STALL",
-        "TSIx_WRBUF_AXI_STALL",
-        "",
-        "",
-        "",
-        "TSIx_UTLB_TRANS",
-        "TSIx_UTLB_TRANS_HIT",
-        "TSIx_UTLB_TRANS_STALL",
-        "TSIx_UTLB_TRANS_MISS_DELAY",
-        "TSIx_UTLB_MMU_REQ",
+    /* Performance counters for the Tiler */
+    "",
+    "",
+    "",
+    "",
+    "TSIx_TILER_ACTIVE",
+    "TSIx_JOBS_PROCESSED",
+    "TSIx_TRIANGLES",
+    "TSIx_LINES",
+    "TSIx_POINTS",
+    "TSIx_FRONT_FACING",
+    "TSIx_BACK_FACING",
+    "TSIx_PRIM_VISIBLE",
+    "TSIx_PRIM_CULLED",
+    "TSIx_PRIM_CLIPPED",
+    "TSIx_PRIM_SAT_CULLED",
+    "",
+    "",
+    "TSIx_BUS_READ",
+    "",
+    "TSIx_BUS_WRITE",
+    "TSIx_LOADING_DESC",
+    "",
+    "",
+    "",
+    "",
+    "TSIx_PREFETCH_STALL",
+    "TSIx_VCACHE_HIT",
+    "TSIx_VCACHE_MISS",
+    "TSIx_VCACHE_LINE_WAIT",
+    "TSIx_VFETCH_POS_READ_WAIT",
+    "TSIx_VFETCH_VERTEX_WAIT",
+    "TSIx_VFETCH_STALL",
+    "TSIx_PRIMASSY_STALL",
+    "TSIx_BBOX_GEN_STALL",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "TSIx_BINNER_STALL",
+    "TSIx_ITER_STALL",
+    "TSIx_COMPRESS_MISS",
+    "TSIx_COMPRESS_STALL",
+    "TSIx_PCACHE_HIT",
+    "TSIx_PCACHE_MISS",
+    "TSIx_PCACHE_MISS_STALL",
+    "TSIx_PCACHE_EVICT_STALL",
+    "TSIx_PMGR_PTR_WR_STALL",
+    "TSIx_PMGR_PTR_RD_STALL",
+    "TSIx_PMGR_CMD_WR_STALL",
+    "TSIx_WRBUF_ACTIVE",
+    "TSIx_WRBUF_HIT",
+    "TSIx_WRBUF_MISS",
+    "TSIx_WRBUF_NO_FREE_LINE_STALL",
+    "TSIx_WRBUF_NO_AXI_ID_STALL",
+    "TSIx_WRBUF_AXI_STALL",
+    "",
+    "",
+    "",
+    "TSIx_UTLB_TRANS",
+    "TSIx_UTLB_TRANS_HIT",
+    "TSIx_UTLB_TRANS_STALL",
+    "TSIx_UTLB_TRANS_MISS_DELAY",
+    "TSIx_UTLB_MMU_REQ",
 
-        /* Performance counters for the Shader Core */
-        "",
-        "",
-        "",
-        "",
-        "TSIx_FRAG_ACTIVE",
-        "TSIx_FRAG_PRIMITIVES",
-        "TSIx_FRAG_PRIM_RAST",
-        "TSIx_FRAG_FPK_ACTIVE",
-        "TSIx_FRAG_STARVING",
-        "TSIx_FRAG_WARPS",
-        "TSIx_FRAG_PARTIAL_WARPS",
-        "TSIx_FRAG_QUADS_RAST",
-        "TSIx_FRAG_QUADS_EZS_TEST",
-        "TSIx_FRAG_QUADS_EZS_UPDATE",
-        "TSIx_FRAG_QUADS_EZS_KILL",
-        "TSIx_FRAG_LZS_TEST",
-        "TSIx_FRAG_LZS_KILL",
-        "",
-        "TSIx_FRAG_PTILES",
-        "TSIx_FRAG_TRANS_ELIM",
-        "TSIx_QUAD_FPK_KILLER",
-        "",
-        "TSIx_COMPUTE_ACTIVE",
-        "TSIx_COMPUTE_TASKS",
-        "TSIx_COMPUTE_WARPS",
-        "TSIx_COMPUTE_STARVING",
-        "TSIx_EXEC_CORE_ACTIVE",
-        "TSIx_EXEC_ACTIVE",
-        "TSIx_EXEC_INSTR_COUNT",
-        "TSIx_EXEC_INSTR_DIVERGED",
-        "TSIx_EXEC_INSTR_STARVING",
-        "TSIx_ARITH_INSTR_SINGLE_FMA",
-        "TSIx_ARITH_INSTR_DOUBLE",
-        "TSIx_ARITH_INSTR_MSG",
-        "TSIx_ARITH_INSTR_MSG_ONLY",
-        "TSIx_TEX_INSTR",
-        "TSIx_TEX_INSTR_MIPMAP",
-        "TSIx_TEX_INSTR_COMPRESSED",
-        "TSIx_TEX_INSTR_3D",
-        "TSIx_TEX_INSTR_TRILINEAR",
-        "TSIx_TEX_COORD_ISSUE",
-        "TSIx_TEX_COORD_STALL",
-        "TSIx_TEX_STARVE_CACHE",
-        "TSIx_TEX_STARVE_FILTER",
-        "TSIx_LS_MEM_READ_FULL",
-        "TSIx_LS_MEM_READ_SHORT",
-        "TSIx_LS_MEM_WRITE_FULL",
-        "TSIx_LS_MEM_WRITE_SHORT",
-        "TSIx_LS_MEM_ATOMIC",
-        "TSIx_VARY_INSTR",
-        "TSIx_VARY_SLOT_32",
-        "TSIx_VARY_SLOT_16",
-        "TSIx_ATTR_INSTR",
-        "TSIx_ARITH_INSTR_FP_MUL",
-        "TSIx_BEATS_RD_FTC",
-        "TSIx_BEATS_RD_FTC_EXT",
-        "TSIx_BEATS_RD_LSC",
-        "TSIx_BEATS_RD_LSC_EXT",
-        "TSIx_BEATS_RD_TEX",
-        "TSIx_BEATS_RD_TEX_EXT",
-        "TSIx_BEATS_RD_OTHER",
-        "TSIx_BEATS_WR_LSC",
-        "TSIx_BEATS_WR_TIB",
-        "",
+    /* Performance counters for the Shader Core */
+    "",
+    "",
+    "",
+    "",
+    "TSIx_FRAG_ACTIVE",
+    "TSIx_FRAG_PRIMITIVES",
+    "TSIx_FRAG_PRIM_RAST",
+    "TSIx_FRAG_FPK_ACTIVE",
+    "TSIx_FRAG_STARVING",
+    "TSIx_FRAG_WARPS",
+    "TSIx_FRAG_PARTIAL_WARPS",
+    "TSIx_FRAG_QUADS_RAST",
+    "TSIx_FRAG_QUADS_EZS_TEST",
+    "TSIx_FRAG_QUADS_EZS_UPDATE",
+    "TSIx_FRAG_QUADS_EZS_KILL",
+    "TSIx_FRAG_LZS_TEST",
+    "TSIx_FRAG_LZS_KILL",
+    "",
+    "TSIx_FRAG_PTILES",
+    "TSIx_FRAG_TRANS_ELIM",
+    "TSIx_QUAD_FPK_KILLER",
+    "",
+    "TSIx_COMPUTE_ACTIVE",
+    "TSIx_COMPUTE_TASKS",
+    "TSIx_COMPUTE_WARPS",
+    "TSIx_COMPUTE_STARVING",
+    "TSIx_EXEC_CORE_ACTIVE",
+    "TSIx_EXEC_ACTIVE",
+    "TSIx_EXEC_INSTR_COUNT",
+    "TSIx_EXEC_INSTR_DIVERGED",
+    "TSIx_EXEC_INSTR_STARVING",
+    "TSIx_ARITH_INSTR_SINGLE_FMA",
+    "TSIx_ARITH_INSTR_DOUBLE",
+    "TSIx_ARITH_INSTR_MSG",
+    "TSIx_ARITH_INSTR_MSG_ONLY",
+    "TSIx_TEX_INSTR",
+    "TSIx_TEX_INSTR_MIPMAP",
+    "TSIx_TEX_INSTR_COMPRESSED",
+    "TSIx_TEX_INSTR_3D",
+    "TSIx_TEX_INSTR_TRILINEAR",
+    "TSIx_TEX_COORD_ISSUE",
+    "TSIx_TEX_COORD_STALL",
+    "TSIx_TEX_STARVE_CACHE",
+    "TSIx_TEX_STARVE_FILTER",
+    "TSIx_LS_MEM_READ_FULL",
+    "TSIx_LS_MEM_READ_SHORT",
+    "TSIx_LS_MEM_WRITE_FULL",
+    "TSIx_LS_MEM_WRITE_SHORT",
+    "TSIx_LS_MEM_ATOMIC",
+    "TSIx_VARY_INSTR",
+    "TSIx_VARY_SLOT_32",
+    "TSIx_VARY_SLOT_16",
+    "TSIx_ATTR_INSTR",
+    "TSIx_ARITH_INSTR_FP_MUL",
+    "TSIx_BEATS_RD_FTC",
+    "TSIx_BEATS_RD_FTC_EXT",
+    "TSIx_BEATS_RD_LSC",
+    "TSIx_BEATS_RD_LSC_EXT",
+    "TSIx_BEATS_RD_TEX",
+    "TSIx_BEATS_RD_TEX_EXT",
+    "TSIx_BEATS_RD_OTHER",
+    "TSIx_BEATS_WR_LSC",
+    "TSIx_BEATS_WR_TIB",
+    "",
 
-        /* Performance counters for the Memory System */
-        "",
-        "",
-        "",
-        "",
-        "TSIx_MMU_REQUESTS",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "TSIx_L2_RD_MSG_IN",
-        "TSIx_L2_RD_MSG_IN_STALL",
-        "TSIx_L2_WR_MSG_IN",
-        "TSIx_L2_WR_MSG_IN_STALL",
-        "TSIx_L2_SNP_MSG_IN",
-        "TSIx_L2_SNP_MSG_IN_STALL",
-        "TSIx_L2_RD_MSG_OUT",
-        "TSIx_L2_RD_MSG_OUT_STALL",
-        "TSIx_L2_WR_MSG_OUT",
-        "TSIx_L2_ANY_LOOKUP",
-        "TSIx_L2_READ_LOOKUP",
-        "TSIx_L2_WRITE_LOOKUP",
-        "TSIx_L2_EXT_SNOOP_LOOKUP",
-        "TSIx_L2_EXT_READ",
-        "TSIx_L2_EXT_READ_NOSNP",
-        "TSIx_L2_EXT_READ_UNIQUE",
-        "TSIx_L2_EXT_READ_BEATS",
-        "TSIx_L2_EXT_AR_STALL",
-        "TSIx_L2_EXT_AR_CNT_Q1",
-        "TSIx_L2_EXT_AR_CNT_Q2",
-        "TSIx_L2_EXT_AR_CNT_Q3",
-        "TSIx_L2_EXT_RRESP_0_127",
-        "TSIx_L2_EXT_RRESP_128_191",
-        "TSIx_L2_EXT_RRESP_192_255",
-        "TSIx_L2_EXT_RRESP_256_319",
-        "TSIx_L2_EXT_RRESP_320_383",
-        "TSIx_L2_EXT_WRITE",
-        "TSIx_L2_EXT_WRITE_NOSNP_FULL",
-        "TSIx_L2_EXT_WRITE_NOSNP_PTL",
-        "TSIx_L2_EXT_WRITE_SNP_FULL",
-        "TSIx_L2_EXT_WRITE_SNP_PTL",
-        "TSIx_L2_EXT_WRITE_BEATS",
-        "TSIx_L2_EXT_W_STALL",
-        "TSIx_L2_EXT_AW_CNT_Q1",
-        "TSIx_L2_EXT_AW_CNT_Q2",
-        "TSIx_L2_EXT_AW_CNT_Q3",
-        "TSIx_L2_EXT_SNOOP",
-        "TSIx_L2_EXT_SNOOP_STALL",
-        "TSIx_L2_EXT_SNOOP_RESP_CLEAN",
-        "TSIx_L2_EXT_SNOOP_RESP_DATA",
-        "TSIx_L2_EXT_SNOOP_INTERNAL",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-        "",
-    };
+    /* Performance counters for the Memory System */
+    "",
+    "",
+    "",
+    "",
+    "TSIx_MMU_REQUESTS",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "TSIx_L2_RD_MSG_IN",
+    "TSIx_L2_RD_MSG_IN_STALL",
+    "TSIx_L2_WR_MSG_IN",
+    "TSIx_L2_WR_MSG_IN_STALL",
+    "TSIx_L2_SNP_MSG_IN",
+    "TSIx_L2_SNP_MSG_IN_STALL",
+    "TSIx_L2_RD_MSG_OUT",
+    "TSIx_L2_RD_MSG_OUT_STALL",
+    "TSIx_L2_WR_MSG_OUT",
+    "TSIx_L2_ANY_LOOKUP",
+    "TSIx_L2_READ_LOOKUP",
+    "TSIx_L2_WRITE_LOOKUP",
+    "TSIx_L2_EXT_SNOOP_LOOKUP",
+    "TSIx_L2_EXT_READ",
+    "TSIx_L2_EXT_READ_NOSNP",
+    "TSIx_L2_EXT_READ_UNIQUE",
+    "TSIx_L2_EXT_READ_BEATS",
+    "TSIx_L2_EXT_AR_STALL",
+    "TSIx_L2_EXT_AR_CNT_Q1",
+    "TSIx_L2_EXT_AR_CNT_Q2",
+    "TSIx_L2_EXT_AR_CNT_Q3",
+    "TSIx_L2_EXT_RRESP_0_127",
+    "TSIx_L2_EXT_RRESP_128_191",
+    "TSIx_L2_EXT_RRESP_192_255",
+    "TSIx_L2_EXT_RRESP_256_319",
+    "TSIx_L2_EXT_RRESP_320_383",
+    "TSIx_L2_EXT_WRITE",
+    "TSIx_L2_EXT_WRITE_NOSNP_FULL",
+    "TSIx_L2_EXT_WRITE_NOSNP_PTL",
+    "TSIx_L2_EXT_WRITE_SNP_FULL",
+    "TSIx_L2_EXT_WRITE_SNP_PTL",
+    "TSIx_L2_EXT_WRITE_BEATS",
+    "TSIx_L2_EXT_W_STALL",
+    "TSIx_L2_EXT_AW_CNT_Q1",
+    "TSIx_L2_EXT_AW_CNT_Q2",
+    "TSIx_L2_EXT_AW_CNT_Q3",
+    "TSIx_L2_EXT_SNOOP",
+    "TSIx_L2_EXT_SNOOP_STALL",
+    "TSIx_L2_EXT_SNOOP_RESP_CLEAN",
+    "TSIx_L2_EXT_SNOOP_RESP_DATA",
+    "TSIx_L2_EXT_SNOOP_INTERNAL",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+    "",
+};
 
-	enum {
-		/* product id masks for old and new versions of the id field. NB: the T60x must be tested before anything else as it could exceptionally be
-		 * treated as a new style of id with produce code 0x6006 */
-		PRODUCT_ID_MASK_OLD = 0xffff,
-		PRODUCT_ID_MASK_NEW = 0xf00f,
-		/* Old style product ids */
-		PRODUCT_ID_T60X = 0x6956,
-		PRODUCT_ID_T62X = 0x0620,
-		PRODUCT_ID_T72X = 0x0720,
-		PRODUCT_ID_T76X = 0x0750,
-		PRODUCT_ID_T82X = 0x0820,
-		PRODUCT_ID_T83X = 0x0830,
-		PRODUCT_ID_T86X = 0x0860,
-		PRODUCT_ID_TFRX = 0x0880,
-		/* New style product ids */
-		PRODUCT_ID_TMIX = 0x6000,
-		PRODUCT_ID_THEX = 0x6001,
-		PRODUCT_ID_TSIX = 0x7000
-	};
+enum
+{
+    /* product id masks for old and new versions of the id field. NB: the T60x must be tested before anything else as it could exceptionally be
+         * treated as a new style of id with produce code 0x6006 */
+    PRODUCT_ID_MASK_OLD = 0xffff,
+    PRODUCT_ID_MASK_NEW = 0xf00f,
+    /* Old style product ids */
+    PRODUCT_ID_T60X = 0x6956,
+    PRODUCT_ID_T62X = 0x0620,
+    PRODUCT_ID_T72X = 0x0720,
+    PRODUCT_ID_T76X = 0x0750,
+    PRODUCT_ID_T82X = 0x0820,
+    PRODUCT_ID_T83X = 0x0830,
+    PRODUCT_ID_T86X = 0x0860,
+    PRODUCT_ID_TFRX = 0x0880,
+    /* New style product ids */
+    PRODUCT_ID_TMIX = 0x6000,
+    PRODUCT_ID_THEX = 0x6001,
+    PRODUCT_ID_TSIX = 0x7000
+};
 
-	struct CounterMapping
-	{
-		uint32_t product_mask;
-		uint32_t product_id;
-		const char * const *names_lut;
-	};
+struct CounterMapping
+{
+    uint32_t           product_mask;
+    uint32_t           product_id;
+    const char *const *names_lut;
+};
 
-	static const CounterMapping products[] = {
-		{ PRODUCT_ID_MASK_OLD, PRODUCT_ID_T60X, hardware_counters_mali_t60x, },
-		{ PRODUCT_ID_MASK_OLD, PRODUCT_ID_T62X, hardware_counters_mali_t62x, },
-		{ PRODUCT_ID_MASK_OLD, PRODUCT_ID_T72X, hardware_counters_mali_t72x, },
-		{ PRODUCT_ID_MASK_OLD, PRODUCT_ID_T76X, hardware_counters_mali_t76x, },
-		{ PRODUCT_ID_MASK_OLD, PRODUCT_ID_T82X, hardware_counters_mali_t82x, },
-		{ PRODUCT_ID_MASK_OLD, PRODUCT_ID_T83X, hardware_counters_mali_t83x, },
-		{ PRODUCT_ID_MASK_OLD, PRODUCT_ID_T86X, hardware_counters_mali_t86x, },
-		{ PRODUCT_ID_MASK_OLD, PRODUCT_ID_TFRX, hardware_counters_mali_t88x, },
-		{ PRODUCT_ID_MASK_NEW, PRODUCT_ID_TMIX, hardware_counters_mali_tMIx, },
-		{ PRODUCT_ID_MASK_NEW, PRODUCT_ID_THEX, hardware_counters_mali_tHEx, },
-		{ PRODUCT_ID_MASK_NEW, PRODUCT_ID_TSIX, hardware_counters_mali_tSIx, },
-	};
+static const CounterMapping products[] =
+{
+    {
+        PRODUCT_ID_MASK_OLD, PRODUCT_ID_T60X, hardware_counters_mali_t60x,
+    },
+    {
+        PRODUCT_ID_MASK_OLD, PRODUCT_ID_T62X, hardware_counters_mali_t62x,
+    },
+    {
+        PRODUCT_ID_MASK_OLD, PRODUCT_ID_T72X, hardware_counters_mali_t72x,
+    },
+    {
+        PRODUCT_ID_MASK_OLD, PRODUCT_ID_T76X, hardware_counters_mali_t76x,
+    },
+    {
+        PRODUCT_ID_MASK_OLD, PRODUCT_ID_T82X, hardware_counters_mali_t82x,
+    },
+    {
+        PRODUCT_ID_MASK_OLD, PRODUCT_ID_T83X, hardware_counters_mali_t83x,
+    },
+    {
+        PRODUCT_ID_MASK_OLD, PRODUCT_ID_T86X, hardware_counters_mali_t86x,
+    },
+    {
+        PRODUCT_ID_MASK_OLD, PRODUCT_ID_TFRX, hardware_counters_mali_t88x,
+    },
+    {
+        PRODUCT_ID_MASK_NEW, PRODUCT_ID_TMIX, hardware_counters_mali_tMIx,
+    },
+    {
+        PRODUCT_ID_MASK_NEW, PRODUCT_ID_THEX, hardware_counters_mali_tHEx,
+    },
+    {
+        PRODUCT_ID_MASK_NEW, PRODUCT_ID_TSIX, hardware_counters_mali_tSIx,
+    },
+};
 
-	enum { NUM_PRODUCTS = sizeof(products) / sizeof(products[0]) };
+enum
+{
+    NUM_PRODUCTS = sizeof(products) / sizeof(products[0])
+};
 } // namespace mali_userspace
 #endif /* ARM_COMPUTE_TEST_HWC_NAMES */
diff --git a/tests/main.cpp b/tests/main.cpp
index c62de66..1f3c365 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,6 +30,7 @@
 #include "tests/framework/Profiler.h"
 #include "tests/framework/command_line/CommandLineOptions.h"
 #include "tests/framework/command_line/CommandLineParser.h"
+#include "tests/framework/command_line/CommonOptions.h"
 #include "tests/framework/instruments/Instruments.h"
 #include "tests/framework/printers/Printers.h"
 
@@ -73,17 +74,6 @@
 
     framework::CommandLineParser parser;
 
-    std::set<framework::InstrumentsDescription> allowed_instruments
-    {
-        std::pair<framework::InstrumentType, framework::ScaleFactor>(framework::InstrumentType::ALL, framework::ScaleFactor::NONE),
-        std::pair<framework::InstrumentType, framework::ScaleFactor>(framework::InstrumentType::NONE, framework::ScaleFactor::NONE),
-    };
-
-    for(const auto &type : framework.available_instruments())
-    {
-        allowed_instruments.insert(type);
-    }
-
     std::set<framework::DatasetMode> allowed_modes
     {
         framework::DatasetMode::PRECOMMIT,
@@ -91,52 +81,18 @@
         framework::DatasetMode::ALL
     };
 
-    std::set<framework::LogFormat> supported_log_formats
-    {
-        framework::LogFormat::NONE,
-        framework::LogFormat::PRETTY,
-        framework::LogFormat::JSON,
-    };
+    framework::CommonOptions options(parser);
 
-    std::set<framework::LogLevel> supported_log_levels
-    {
-        framework::LogLevel::NONE,
-        framework::LogLevel::CONFIG,
-        framework::LogLevel::TESTS,
-        framework::LogLevel::ERRORS,
-        framework::LogLevel::DEBUG,
-        framework::LogLevel::MEASUREMENTS,
-        framework::LogLevel::ALL,
-    };
-
-    auto help = parser.add_option<framework::ToggleOption>("help");
-    help->set_help("Show this help message");
     auto dataset_mode = parser.add_option<framework::EnumOption<framework::DatasetMode>>("mode", allowed_modes, framework::DatasetMode::PRECOMMIT);
     dataset_mode->set_help("For managed datasets select which group to use");
-    auto instruments = parser.add_option<framework::EnumListOption<framework::InstrumentsDescription>>("instruments", allowed_instruments, std::initializer_list<framework::InstrumentsDescription> { std::pair<framework::InstrumentType, framework::ScaleFactor>(framework::InstrumentType::WALL_CLOCK_TIMER, framework::ScaleFactor::NONE) });
-    instruments->set_help("Set the profiling instruments to use");
-    auto iterations = parser.add_option<framework::SimpleOption<int>>("iterations", 1);
-    iterations->set_help("Number of iterations per test case");
-    auto threads = parser.add_option<framework::SimpleOption<int>>("threads", 1);
-    threads->set_help("Number of threads to use");
-    auto log_format = parser.add_option<framework::EnumOption<framework::LogFormat>>("log-format", supported_log_formats, framework::LogFormat::PRETTY);
-    log_format->set_help("Output format for measurements and failures (affects only log-file)");
     auto filter = parser.add_option<framework::SimpleOption<std::string>>("filter", ".*");
     filter->set_help("Regular expression to select test cases");
     auto filter_id = parser.add_option<framework::SimpleOption<std::string>>("filter-id");
     filter_id->set_help("List of test ids. ... can be used to define a range.");
-    auto log_file = parser.add_option<framework::SimpleOption<std::string>>("log-file");
-    log_file->set_help("Write output to file instead of to the console (affected by log-format)");
-    auto log_level = parser.add_option<framework::EnumOption<framework::LogLevel>>("log-level", supported_log_levels, framework::LogLevel::ALL);
-    log_level->set_help("Verbosity of the output");
-    auto throw_errors = parser.add_option<framework::ToggleOption>("throw-errors");
-    throw_errors->set_help("Don't catch fatal errors (useful for debugging)");
     auto stop_on_error = parser.add_option<framework::ToggleOption>("stop-on-error");
     stop_on_error->set_help("Abort execution after the first failed test (useful for debugging)");
     auto seed = parser.add_option<framework::SimpleOption<std::random_device::result_type>>("seed", std::random_device()());
     seed->set_help("Global seed for random number generation");
-    auto color_output = parser.add_option<framework::ToggleOption>("color-output", true);
-    color_output->set_help("Produce colored output on the console");
     auto list_tests = parser.add_option<framework::ToggleOption>("list-tests", false);
     list_tests->set_help("List all test names");
     auto test_instruments = parser.add_option<framework::ToggleOption>("test-instruments", false);
@@ -145,81 +101,22 @@
     error_on_missing_assets->set_help("Mark a test as failed instead of skipping it when assets are missing");
     auto assets = parser.add_positional_option<framework::SimpleOption<std::string>>("assets");
     assets->set_help("Path to the assets directory");
-    auto pretty_console = parser.add_option<framework::ToggleOption>("pretty-console", false);
-    pretty_console->set_help("Produce pretty output on the console");
-    auto json_file = parser.add_option<framework::SimpleOption<std::string>>("json-file");
-    json_file->set_help("Write output to a json file.");
-    auto pretty_file = parser.add_option<framework::SimpleOption<std::string>>("pretty-file");
-    pretty_file->set_help("Write output to a text file");
 
     try
     {
         parser.parse(argc, argv);
 
-        if(help->is_set() && help->value())
+        if(options.help->is_set() && options.help->value())
         {
             parser.print_help(argv[0]);
             return 0;
         }
 
-        std::vector<std::unique_ptr<framework::Printer>> printers;
-        std::vector<std::shared_ptr<std::ofstream>>      log_streams;
+        std::vector<std::unique_ptr<framework::Printer>> printers = options.create_printers();
 
-        if(pretty_console->value() && (log_file->is_set() || log_format->value() != framework::LogFormat::PRETTY))
-        {
-            auto pretty_printer = support::cpp14::make_unique<framework::PrettyPrinter>();
-            pretty_printer->set_color_output(color_output->value());
-            printers.push_back(std::move(pretty_printer));
-        }
+        Scheduler::get().set_num_threads(options.threads->value());
 
-        std::unique_ptr<framework::Printer> printer;
-        switch(log_format->value())
-        {
-            case framework::LogFormat::JSON:
-                printer = support::cpp14::make_unique<framework::JSONPrinter>();
-                break;
-            case framework::LogFormat::NONE:
-                break;
-            case framework::LogFormat::PRETTY:
-            default:
-                auto pretty_printer = support::cpp14::make_unique<framework::PrettyPrinter>();
-                // Don't use colours if we print to a file:
-                pretty_printer->set_color_output((!log_file->is_set()) && color_output->value());
-                printer = std::move(pretty_printer);
-                break;
-        }
-
-        if(log_file->is_set())
-        {
-            log_streams.push_back(std::make_shared<std::ofstream>(log_file->value()));
-            if(printer != nullptr)
-            {
-                printer->set_stream(*log_streams.back().get());
-            }
-        }
-
-        if(printer != nullptr)
-        {
-            printers.push_back(std::move(printer));
-        }
-
-        if(json_file->is_set())
-        {
-            printers.push_back(support::cpp14::make_unique<framework::JSONPrinter>());
-            log_streams.push_back(std::make_shared<std::ofstream>(json_file->value()));
-            printers.back()->set_stream(*log_streams.back().get());
-        }
-
-        if(pretty_file->is_set())
-        {
-            printers.push_back(support::cpp14::make_unique<framework::PrettyPrinter>());
-            log_streams.push_back(std::make_shared<std::ofstream>(pretty_file->value()));
-            printers.back()->set_stream(*log_streams.back().get());
-        }
-
-        Scheduler::get().set_num_threads(threads->value());
-
-        if(log_level->value() > framework::LogLevel::NONE)
+        if(options.log_level->value() > framework::LogLevel::NONE)
         {
             for(auto &p : printers)
             {
@@ -227,13 +124,14 @@
             }
         }
 
-        if(log_level->value() >= framework::LogLevel::CONFIG)
+        if(options.log_level->value() >= framework::LogLevel::CONFIG)
         {
             for(auto &p : printers)
             {
+                p->print_entry("Version", build_information());
                 p->print_entry("Seed", support::cpp11::to_string(seed->value()));
-                p->print_entry("Iterations", support::cpp11::to_string(iterations->value()));
-                p->print_entry("Threads", support::cpp11::to_string(threads->value()));
+                p->print_entry("Iterations", support::cpp11::to_string(options.iterations->value()));
+                p->print_entry("Threads", support::cpp11::to_string(options.threads->value()));
                 {
                     using support::cpp11::to_string;
                     p->print_entry("Dataset mode", to_string(dataset_mode->value()));
@@ -241,12 +139,12 @@
             }
         }
 
-        framework.init(instruments->value(), iterations->value(), dataset_mode->value(), filter->value(), filter_id->value(), log_level->value());
+        framework.init(options.instruments->value(), options.iterations->value(), dataset_mode->value(), filter->value(), filter_id->value(), options.log_level->value());
         for(auto &p : printers)
         {
             framework.add_printer(p.get());
         }
-        framework.set_throw_errors(throw_errors->value());
+        framework.set_throw_errors(options.throw_errors->value());
         framework.set_stop_on_error(stop_on_error->value());
         framework.set_error_on_missing_assets(error_on_missing_assets->value());
 
@@ -285,7 +183,7 @@
 
         success = framework.run();
 
-        if(log_level->value() > framework::LogLevel::NONE)
+        if(options.log_level->value() > framework::LogLevel::NONE)
         {
             for(auto &p : printers)
             {
@@ -293,13 +191,17 @@
             }
         }
 
+#ifdef ARM_COMPUTE_CL
+        CLScheduler::get().sync();
+#endif /* ARM_COMPUTE_CL */
+
         return (success ? 0 : 1);
     }
     catch(const std::exception &error)
     {
         std::cerr << error.what() << "\n";
 
-        if(throw_errors->value())
+        if(options.throw_errors->value())
         {
             throw;
         }
diff --git a/tests/networks/AlexNetNetwork.h b/tests/networks/AlexNetNetwork.h
index 448cf31..819111f 100644
--- a/tests/networks/AlexNetNetwork.h
+++ b/tests/networks/AlexNetNetwork.h
@@ -564,6 +564,13 @@
         smx.run();
     }
 
+    /** Sync the results */
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(output);
+    }
+
 private:
     struct DirectConv
     {
diff --git a/tests/networks/LeNet5Network.h b/tests/networks/LeNet5Network.h
index f8aac30..a46489f 100644
--- a/tests/networks/LeNet5Network.h
+++ b/tests/networks/LeNet5Network.h
@@ -230,6 +230,13 @@
         smx.run();
     }
 
+    /** Sync the results */
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(output);
+    }
+
 private:
     unsigned int _batches{ 0 };
 
diff --git a/tests/networks/MobileNetNetwork.h b/tests/networks/MobileNetNetwork.h
index 1bc8ad9..8c3cb1f 100644
--- a/tests/networks/MobileNetNetwork.h
+++ b/tests/networks/MobileNetNetwork.h
@@ -251,6 +251,13 @@
         reshape.run();
     }
 
+    /** Sync the results */
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(output);
+    }
+
 private:
     void depthwise_conv_block_init(unsigned int idx, unsigned int ifm, unsigned int ofm)
     {
diff --git a/tests/networks/MobileNetV1Network.h b/tests/networks/MobileNetV1Network.h
index dbe3f49..0957c6b 100644
--- a/tests/networks/MobileNetV1Network.h
+++ b/tests/networks/MobileNetV1Network.h
@@ -300,6 +300,13 @@
         smx.run();
     }
 
+    /** Sync the results */
+    void sync()
+    {
+        sync_if_necessary<TensorType>();
+        sync_tensor_if_necessary<TensorType>(output);
+    }
+
 private:
     void depthwise_conv_block_init(unsigned int idx, unsigned int ifm, unsigned int ofm)
     {
diff --git a/tests/validation/CL/Convolution.cpp b/tests/validation/CL/Convolution.cpp
new file mode 100644
index 0000000..ccb0abc
--- /dev/null
+++ b/tests/validation/CL/Convolution.cpp
@@ -0,0 +1,447 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLConvolution.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ConvolutionFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(CustomConvolution)
+TEST_SUITE(CustomConvolutionSquare)
+TEST_SUITE(CustomConvolution3x3)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+                                                                           datasets::BorderModes()),
+                                                                   framework::dataset::make("filter_size", { 3 })),
+               shape, data_type, border_mode, filter_size)
+{
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, data_type);
+    CLTensor dst = create_tensor<CLTensor>(shape, data_type);
+
+    // Create conv matrix
+    int16_t conv[9];
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLConvolution3x3 convolution;
+    convolution.configure(&src, &dst, conv, 0, border_mode);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_size / 2));
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+    calculator.set_border_size(1);
+    calculator.set_border_mode(border_mode);
+
+    const PaddingSize dst_padding = calculator.required_padding();
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-1);
+
+    const PaddingSize src_padding = calculator.required_padding();
+
+    validate(src.info()->padding(), src_padding);
+    validate(dst.info()->padding(), dst_padding);
+}
+
+template <typename T>
+using CLConvolutionFixture = ConvolutionSquareValidationFixture<CLTensor, CLAccessor, CLConvolution3x3, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::U8)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 3 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 3 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+TEST_SUITE_END() /* Custom_Convolution 3x3 */
+
+TEST_SUITE(CustomConvolution5x5)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+                                                                           datasets::BorderModes()),
+                                                                   framework::dataset::make("filter_size", { 5 })),
+               shape, data_type, border_mode, filter_size)
+{
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, data_type);
+    CLTensor dst = create_tensor<CLTensor>(shape, data_type);
+
+    // Create conv matrix
+    int16_t conv[25];
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLConvolution5x5 convolution;
+    convolution.configure(&src, &dst, conv, 0, border_mode);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_size / 2));
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+    calculator.set_border_size(2);
+    calculator.set_border_mode(border_mode);
+
+    const PaddingSize dst_padding = calculator.required_padding();
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-2);
+
+    const PaddingSize src_padding = calculator.required_padding();
+
+    validate(src.info()->padding(), src_padding);
+    validate(dst.info()->padding(), dst_padding);
+}
+
+template <typename T>
+using CLConvolutionFixture = ConvolutionSquareValidationFixture<CLTensor, CLAccessor, CLConvolution5x5, T>;
+FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::U8)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 5 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 5 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+TEST_SUITE_END() /* Custom Convolution 5x5 */
+
+TEST_SUITE(CustomConvolution7x7)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+                                                                           datasets::BorderModes()),
+                                                                   framework::dataset::make("filter_size", { 7 })),
+               shape, data_type, border_mode, filter_size)
+{
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, data_type);
+    CLTensor dst = create_tensor<CLTensor>(shape, data_type);
+
+    // Create conv matrix
+    int16_t conv[49];
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLConvolution7x7 convolution;
+    convolution.configure(&src, &dst, conv, 0, border_mode);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_size / 2));
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+    calculator.set_border_size(3);
+    calculator.set_border_mode(border_mode);
+
+    const PaddingSize dst_padding = calculator.required_padding();
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-3);
+
+    const PaddingSize src_padding = calculator.required_padding();
+
+    validate(src.info()->padding(), src_padding);
+    validate(dst.info()->padding(), dst_padding);
+}
+
+template <typename T>
+using CLConvolutionFixture = ConvolutionSquareValidationFixture<CLTensor, CLAccessor, CLConvolution7x7, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::U8)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 7 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 7 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+TEST_SUITE_END() /* Custom Convolution 7x7 */
+
+TEST_SUITE(CustomConvolution9x9)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+                                                                           datasets::BorderModes()),
+                                                                   framework::dataset::make("filter_size", { 9 })),
+               shape, data_type, border_mode, filter_size)
+{
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, data_type);
+    CLTensor dst = create_tensor<CLTensor>(shape, data_type);
+
+    // Create conv matrix
+    int16_t conv[81];
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLConvolution9x9 convolution;
+    convolution.configure(&src, &dst, conv, 0, border_mode);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_size / 2));
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+    calculator.set_border_size(4);
+    calculator.set_border_mode(border_mode);
+
+    const PaddingSize dst_padding = calculator.required_padding();
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-4);
+
+    const PaddingSize src_padding = calculator.required_padding();
+
+    validate(src.info()->padding(), src_padding);
+    validate(dst.info()->padding(), dst_padding);
+}
+
+template <typename T>
+using CLConvolutionFixture = ConvolutionSquareValidationFixture<CLTensor, CLAccessor, CLConvolution9x9, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::U8)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 9 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 9 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+TEST_SUITE_END() /* Custom Convolution 9x9 */
+TEST_SUITE_END() /* Custom Convolution Square */
+
+TEST_SUITE(CustomConvolutionRectangle)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType",
+                                                                                           DataType::U8)),
+                                                                                   datasets::BorderModes()),
+                                                                           framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
+                                                                   framework::dataset::make("filter_height", { 3, 5, 7, 9 })),
+               shape, data_type, border_mode, filter_width, filter_height)
+{
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, data_type);
+    CLTensor dst = create_tensor<CLTensor>(shape, data_type);
+
+    // Create conv matrix
+    int16_t conv[filter_width * filter_height];
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLConvolutionRectangle convolution;
+    convolution.configure(&src, &dst, conv, filter_width, filter_height, 1, border_mode);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_height / 2, filter_width / 2));
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+    calculator.set_border_size(filter_width / 2);
+    calculator.set_border_mode(border_mode);
+
+    const PaddingSize dst_padding = calculator.required_padding();
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-(filter_width / 2));
+
+    const PaddingSize width_padding = calculator.required_padding();
+
+    calculator.set_border_size(filter_height / 2);
+    calculator.set_access_offset(-(filter_height / 2));
+    const PaddingSize height_padding = calculator.required_padding();
+
+    validate(src.info()->padding(), width_padding, height_padding);
+    validate(dst.info()->padding(), dst_padding);
+}
+
+template <typename T>
+using CLConvolutionFixture = ConvolutionRectangleValidationFixture<CLTensor, CLAccessor, CLConvolutionRectangle, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::U8)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                                   framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
+                                                                                                           framework::dataset::make("filter_height", { 3, 5, 7, 9 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                                 framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
+                                                                                                         framework::dataset::make("filter_height", { 3, 5, 7, 9 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+TEST_SUITE_END() /* Custom Convolution Rectangle */
+
+TEST_SUITE(CustomConvolutionSeparable)
+TEST_SUITE(CustomConvolutionSeparable5x5)
+template <typename T>
+using CLConvolutionFixture = ConvolutionSeparableValidationFixture<CLTensor, CLAccessor, CLConvolution5x5, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::U8)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 5 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 5 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+TEST_SUITE_END() /* Custom Convolution Separable 5x5 */
+
+TEST_SUITE(CustomConvolutionSeparablex7x7)
+template <typename T>
+using CLConvolutionFixture = ConvolutionSeparableValidationFixture<CLTensor, CLAccessor, CLConvolution7x7, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::U8)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 7 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 7 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+TEST_SUITE_END() /* Custom Convolution Separable 7x7 */
+
+TEST_SUITE(CustomConvolutionSeparable9x9)
+template <typename T>
+using CLConvolutionFixture = ConvolutionSeparableValidationFixture<CLTensor, CLAccessor, CLConvolution9x9, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::U8)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 9 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 9 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
+}
+TEST_SUITE_END() /* Custom Convolution Separable 9x9 */
+
+TEST_SUITE_END() /* Custom Convolution Separable */
+TEST_SUITE_END() /* Custom Convolution */
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp
index 035e492..42d2e9f 100644
--- a/tests/validation/CL/ConvolutionLayer.cpp
+++ b/tests/validation/CL/ConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -111,7 +111,7 @@
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                                                                                     framework::dataset::make("ReshapeWeights", { true, false })),
+                                                                                                                     framework::dataset::make("ReshapeWeights", { true })),
                                                                                                              framework::dataset::make("DataType",
                                                                                                                      DataType::F16)))
 {
@@ -119,7 +119,7 @@
     validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeConvolutionLayerDataset(),
-                                                                                                                   framework::dataset::make("ReshapeWeights", { true, false })),
+                                                                                                                   framework::dataset::make("ReshapeWeights", { true })),
                                                                                                            framework::dataset::make("DataType",
                                                                                                                    DataType::F16)))
 {
@@ -130,7 +130,7 @@
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                                                                                      framework::dataset::make("ReshapeWeights", { true, false })),
+                                                                                                                      framework::dataset::make("ReshapeWeights", { true })),
                                                                                                               framework::dataset::make("DataType",
                                                                                                                       DataType::F32)))
 {
@@ -138,7 +138,7 @@
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeConvolutionLayerDataset(),
-                                                                                                                    framework::dataset::make("ReshapeWeights", { true, false })),
+                                                                                                                    framework::dataset::make("ReshapeWeights", { true })),
                                                                                                             framework::dataset::make("DataType",
                                                                                                                     DataType::F32)))
 {
@@ -155,7 +155,7 @@
 TEST_SUITE(QS8)
 // We test for fixed point precision [4,6]
 FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
-                       framework::dataset::make("ReshapeWeights", { true, false })),
+                       framework::dataset::make("ReshapeWeights", { true })),
                        framework::dataset::make("DataType",
                                                 DataType::QS8)),
                        framework::dataset::make("FractionalBits", 4, 7)))
@@ -164,7 +164,7 @@
     validate(CLAccessor(_target), _reference, tolerance_fixed);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeConvolutionLayerDataset(),
-                                                                                                                       framework::dataset::make("ReshapeWeights", { true, false })),
+                                                                                                                       framework::dataset::make("ReshapeWeights", { true })),
                                                                                                                        framework::dataset::make("DataType",
                                                                                                                                DataType::QS8)),
                                                                                                                        framework::dataset::make("FractionalBits", 4, 7)))
@@ -177,7 +177,7 @@
 TEST_SUITE(QS16)
 // Testing for fixed point position [1,14)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLConvolutionLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
-                       framework::dataset::make("ReshapeWeights", { true, false })),
+                       framework::dataset::make("ReshapeWeights", { true })),
                        framework::dataset::make("DataType",
                                                 DataType::QS16)),
                        framework::dataset::make("FractionalBits", 1, 14)))
@@ -186,7 +186,7 @@
     validate(CLAccessor(_target), _reference, tolerance_fixed);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeConvolutionLayerDataset(),
-                                                                                                                        framework::dataset::make("ReshapeWeights", { true, false })),
+                                                                                                                        framework::dataset::make("ReshapeWeights", { true })),
                                                                                                                         framework::dataset::make("DataType",
                                                                                                                                 DataType::QS16)),
                                                                                                                         framework::dataset::make("FractionalBits", 1, 14)))
diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp
new file mode 100644
index 0000000..59e8553
--- /dev/null
+++ b/tests/validation/CL/DeconvolutionLayer.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DeconvolutionLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
+
+const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 2)
+                     * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 });
+
+const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1)
+                     * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 });
+
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(DeconvolutionLayer)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, (combine(datasets::SmallDeconvolutionShapes(), framework::dataset::make("DataType", DataType::F32))),
+               input_shape, data_type)
+{
+    // Create shapes
+    const unsigned int kernel_size_x = 3;
+    const unsigned int kernel_size_y = 3;
+    const unsigned int num_kernels   = 1;
+    const TensorShape  weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels);
+    const TensorShape  bias_shape(num_kernels);
+    auto               out_dim      = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, 1, 1, 0, 0, 1, 1);
+    TensorShape        output_shape = deconvolution_output_shape(out_dim, input_shape, weights_shape);
+
+    // Create tensors
+    CLTensor src     = create_tensor<CLTensor>(input_shape, data_type, 1);
+    CLTensor weights = create_tensor<CLTensor>(weights_shape, data_type, 1);
+    CLTensor bias    = create_tensor<CLTensor>(bias_shape, data_type, 1);
+    CLTensor dst     = create_tensor<CLTensor>(output_shape, data_type, 1);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    CLDeconvolutionLayer deconv;
+    deconv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), 0, 0);
+
+    // Validate valid region
+    const ValidRegion src_valid_region     = shape_to_valid_region(input_shape);
+    const ValidRegion weights_valid_region = shape_to_valid_region(weights_shape);
+    const ValidRegion bias_valid_region    = shape_to_valid_region(bias_shape);
+    const ValidRegion dst_valid_region     = shape_to_valid_region(output_shape);
+
+    validate(src.info()->valid_region(), src_valid_region);
+    validate(weights.info()->valid_region(), weights_valid_region);
+    validate(bias.info()->valid_region(), bias_valid_region);
+    validate(dst.info()->valid_region(), dst_valid_region);
+}
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
+    framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),   // Mismatching data type
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),   // Invalid weights shape
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 4),   // Non supported data type
+                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 11),  // Invalid bias shape
+                                            TensorInfo(TensorShape(13U, 11U, 4U, 3U), 1, DataType::F32, 0), // Window shrink
+                                            TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32, 0),
+                                          }),
+    framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16, 0),
+                                            TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::QS8, 5),
+                                            TensorInfo(TensorShape(3U, 2U, 2U, 2U), 1, DataType::F32, 11),
+                                            TensorInfo(TensorShape(3U, 3U, 4U), 1, DataType::F32, 0),
+                                              TensorInfo(TensorShape(1U, 1U, 2U, 4U), 1, DataType::F32, 0),
+                                          })),
+    framework::dataset::make("BiasInfo",  { TensorInfo(TensorShape(1U), 1, DataType::F16, 0),
+                                            TensorInfo(TensorShape(1U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(1U), 1, DataType::F32, 5),
+                                            TensorInfo(TensorShape(25U, 11U), 1, DataType::F32, 11),
+                                            TensorInfo(TensorShape(1U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(4U), 1, DataType::F32, 0),
+                                          })),
+    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16, 0),
+                                            TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 5),
+                                            TensorInfo(TensorShape(13U, 13U, 2U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(11U, 9U, 1U, 3U), 1, DataType::F32, 0),
+                                            TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32, 0),
+                                          })),
+    framework::dataset::make("PadStrideInfo", { PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 1, 1),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                           })),
+    framework::dataset::make("ax",          {   1U,
+                                                1U,
+                                                1U,
+                                                1U,
+                                                0U,
+                                                0U,
+                                            })),
+   framework::dataset::make("ay",           {   1U,
+                                                1U,
+                                                1U,
+                                                1U,
+                                                0U,
+                                                0U,
+                                            })),
+    framework::dataset::make("Expected", { false, false, false, false, false, true })),
+    input_info, weights_info, bias_info, output_info, pad_info, ax, ay, expected)
+{
+    bool is_valid = bool(CLDeconvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pad_info, ax, ay));
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using CLDeconvolutionLayerFixture3x3 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 3, 3>;
+
+template <typename T>
+using CLDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 1, 1>;
+
+TEST_SUITE(Float)
+
+TEST_SUITE(FP32)
+TEST_SUITE(W3x3)
+
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(data3x3, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(W1x1)
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1<float>, framework::DatasetMode::ALL, combine(data1x1, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
index 92a2773..43e04fb 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,7 +43,7 @@
 namespace
 {
 constexpr RelativeTolerance<float>   tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-constexpr RelativeTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */
 } // namespace
 
 TEST_SUITE(CL)
@@ -96,13 +96,13 @@
 TEST_SUITE(W3x3)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
                        framework::dataset::make("DataType", DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127) })))
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
                        framework::dataset::make("DataType", DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127) })))
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
diff --git a/tests/validation/CL/DepthwiseSeparableConvolutionLayer.cpp b/tests/validation/CL/DepthwiseSeparableConvolutionLayer.cpp
index 4d9f6b8..43968f7 100644
--- a/tests/validation/CL/DepthwiseSeparableConvolutionLayer.cpp
+++ b/tests/validation/CL/DepthwiseSeparableConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,7 +27,7 @@
 #include "arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/PaddingCalculator.h"
-#include "tests/datasets/MobileNetDepthwiseSeparableConvolutionLayerDataset.h"
+#include "tests/datasets/system_tests/mobilenet/MobileNetDepthwiseSeparableConvolutionLayerDataset.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
diff --git a/tests/validation/CL/Derivative.cpp b/tests/validation/CL/Derivative.cpp
new file mode 100644
index 0000000..49666d6
--- /dev/null
+++ b/tests/validation/CL/Derivative.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/functions/CLDerivative.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/GradientDimensionDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DerivativeFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(Derivative)
+
+using CLDerivativeFixture = DerivativeValidationFixture<CLTensor, CLAccessor, CLDerivative, uint8_t, int16_t>;
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                   Format::U8)),
+               shape, border_mode, format)
+{
+    // Generate a random constant value
+    std::mt19937                           gen(library->seed());
+    std::uniform_int_distribution<uint8_t> int_dist(0, 255);
+    const uint8_t                          constant_border_value = int_dist(gen);
+
+    // Create tensors
+    CLTensor src   = create_tensor<CLTensor>(shape, data_type_from_format(format));
+    CLTensor dst_x = create_tensor<CLTensor>(shape, DataType::S16);
+    CLTensor dst_y = create_tensor<CLTensor>(shape, DataType::S16);
+
+    src.info()->set_format(format);
+    dst_x.info()->set_format(Format::S16);
+    dst_y.info()->set_format(Format::S16);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst_x.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst_y.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create Derivative configure function
+    CLDerivative derivative;
+    derivative.configure(&src, &dst_x, &dst_y, border_mode, constant_border_value);
+
+    // Validate valid region
+    constexpr BorderSize border_size{ 1 };
+    const ValidRegion    dst_valid_region = shape_to_valid_region(shape, border_mode == BorderMode::UNDEFINED, border_size);
+
+    validate(dst_x.info()->valid_region(), dst_valid_region);
+    validate(dst_y.info()->valid_region(), dst_valid_region);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDerivativeFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                         Format::U8)),
+                                                                                                 datasets::GradientDimensions()))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
+
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDerivativeFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               datasets::GradientDimensions()))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
+
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/FullyConnectedLayer.cpp b/tests/validation/CL/FullyConnectedLayer.cpp
index 0d8c877..aba92f1 100644
--- a/tests/validation/CL/FullyConnectedLayer.cpp
+++ b/tests/validation/CL/FullyConnectedLayer.cpp
@@ -115,6 +115,57 @@
     ARM_COMPUTE_EXPECT(weights.info()->quantization_info() == weights_quantization_info, framework::LogLevel::ERRORS);
 }
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
+    framework::dataset::make("InputInfo", { TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32),    // Mismatching data types
+                                            TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::QS8, 2), // Mismatching fixed point position
+                                            TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32),    // Invalid weights dimensions
+                                            TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32),    // Wrongly reshaped weights
+                                            TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32),
+                                          }),
+    framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(315U, 271U), 1, DataType::F16),
+                                             TensorInfo(TensorShape(315U, 271U), 1, DataType::QS8, 3),
+                                             TensorInfo(TensorShape(192U, 192U), 1, DataType::F32),
+                                             TensorInfo(TensorShape(192U, 192U), 1, DataType::F32),
+                                             TensorInfo(TensorShape(217U, 315U), 1, DataType::F32),
+                                             TensorInfo(TensorShape(217U, 315U), 1, DataType::F32),
+                                             TensorInfo(TensorShape(192U, 192U), 1, DataType::F32),
+                                          })),
+    framework::dataset::make("BiasInfo",{ TensorInfo(TensorShape(271U), 1, DataType::F32),
+                                          TensorInfo(TensorShape(271U), 1, DataType::QS8, 2),
+                                          TensorInfo(TensorShape(192U), 1, DataType::F32),
+                                          TensorInfo(TensorShape(192U), 1, DataType::F32),
+                                          TensorInfo(TensorShape(271U), 1, DataType::F32),
+                                          TensorInfo(TensorShape(271U), 1, DataType::F32),
+                                          TensorInfo(TensorShape(192U), 1, DataType::F32),
+                                          })),
+    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(271U, 3U), 1, DataType::QS8, 3),
+                                            TensorInfo(TensorShape(192U, 4U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(192U, 4U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
+                                            TensorInfo(TensorShape(192U, 4U), 1, DataType::F32),
+                                           })),
+    framework::dataset::make("TransposeWeights",{ true, true, true, false, true, true, true })),
+    framework::dataset::make("ReshapedWeights",{ false, false, false, false, false, false , false})),
+    framework::dataset::make("Expected", { false, false, true, true, false, false, true })),
+    input_info, weights_info, bias_info, output_info, transpose_weights, reshaped_weights, expected)
+{
+    Status status = CLFullyConnectedLayer::validate(&input_info.clone()->set_is_resizable(false),
+                                                    &weights_info.clone()->set_is_resizable(false),
+                                                    &bias_info.clone()->set_is_resizable(false),
+                                                    &output_info.clone()->set_is_resizable(false),
+                                                    transpose_weights,
+                                                    reshaped_weights);
+    ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
 template <typename T>
 using CLFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T, false>;
 
diff --git a/tests/validation/CL/GEMM.cpp b/tests/validation/CL/GEMM.cpp
index 878f9fc..4e7b24e 100644
--- a/tests/validation/CL/GEMM.cpp
+++ b/tests/validation/CL/GEMM.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,11 +21,14 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
 #include "arm_compute/runtime/CL/functions/CLGEMM.h"
 #include "tests/CL/CLAccessor.h"
+#include "tests/CL/Helper.h"
 #include "tests/PaddingCalculator.h"
 #include "tests/datasets/LargeGEMMDataset.h"
 #include "tests/datasets/SmallGEMMDataset.h"
@@ -34,6 +37,8 @@
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
 #include "tests/validation/fixtures/GEMMFixture.h"
+#include "tests/validation/fixtures/GEMMInterleave4x4Fixture.h"
+#include "tests/validation/fixtures/GEMMTranspose1xWFixture.h"
 
 namespace arm_compute
 {
@@ -46,7 +51,8 @@
 RelativeTolerance<float>            tolerance_f32(0.001f);    /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
 RelativeTolerance<half_float::half> tolerance_f16(half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
 constexpr AbsoluteTolerance<float>  tolerance_q(1.0f);        /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */
-constexpr float                     tolerance_num = 0.02f;    /**< Tolerance number */
+constexpr float                     tolerance_num   = 0.02f;  /**< Tolerance number */
+const auto                          data_interleave = framework::dataset::make("M", 8, 14) * framework::dataset::make("N", 7, 14);
 
 /** CNN data types */
 const auto CNNDataTypes = framework::dataset::make("DataType",
@@ -58,9 +64,50 @@
 });
 } // namespace
 
+const auto data_transpose = framework::dataset::make("M", 8, 14) * framework::dataset::make("N", 7, 14);
+
 TEST_SUITE(CL)
 TEST_SUITE(GEMM)
 
+TEST_SUITE(INTERLEAVE_4X4)
+using CLGEMMInterleave4x4 = CLSynthetizeFunctionWithZeroConstantBorder<CLGEMMInterleave4x4Kernel, 4>;
+
+TEST_SUITE(FP32)
+using CLGEMMInterleave4x4Fixture = GEMMInterleave4x4ValidationFixture<CLTensor, CLAccessor, CLGEMMInterleave4x4, float>;
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave * framework::dataset::make("DataType", DataType::F32))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QS8)
+using CLGEMMInterleave4x4Fixture = GEMMInterleave4x4ValidationFixedPointFixture<CLTensor, CLAccessor, CLGEMMInterleave4x4, int8_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave *
+                       framework::dataset::make("DataType", DataType::QS8)
+                       * framework::dataset::make("FractionalBits", 1, 7))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(QS16)
+using CLGEMMInterleave4x4Fixture = GEMMInterleave4x4ValidationFixedPointFixture<CLTensor, CLAccessor, CLGEMMInterleave4x4, int16_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMInterleave4x4Fixture, framework::DatasetMode::PRECOMMIT, data_interleave *
+                       framework::dataset::make("DataType", DataType::QS16)
+                       * framework::dataset::make("FractionalBits", 1, 14))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+
+TEST_SUITE_END() // INTERLEAVE_4X4
+
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallGEMMDataset(), datasets::LargeGEMMDataset()), CNNDataTypes),
                shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type)
 {
@@ -86,6 +133,46 @@
 template <typename T>
 using CLGEMMFixture = GEMMValidationFixture<CLTensor, CLAccessor, CLGEMM, T>;
 
+TEST_SUITE(TRANSPOSE_1XW)
+using CLGEMMTranspose1xW        = CLSynthetizeFunctionWithZeroConstantBorder<CLGEMMTranspose1xWKernel, 4>;
+using CLGEMMTranspose1xWFixture = GEMMTranspose1xWValidationFixture<CLTensor, CLAccessor, CLGEMMTranspose1xW, float>;
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::F32))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QS8)
+using CLGEMMTranspose1xW        = CLSynthetizeFunctionWithZeroConstantBorder<CLGEMMTranspose1xWKernel, 16>;
+using CLGEMMTranspose1xWFixture = GEMMTranspose1xWValidationFixedPointFixture<CLTensor, CLAccessor, CLGEMMTranspose1xW, int8_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose *
+                       framework::dataset::make("DataType", DataType::QS8)
+                       * framework::dataset::make("FractionalBits", 1, 7))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(QS16)
+using CLGEMMTranspose1xW        = CLSynthetizeFunctionWithZeroConstantBorder<CLGEMMTranspose1xWKernel, 8>;
+using CLGEMMTranspose1xWFixture = GEMMTranspose1xWValidationFixedPointFixture<CLTensor, CLAccessor, CLGEMMTranspose1xW, int16_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose *
+                       framework::dataset::make("DataType", DataType::QS16)
+                       * framework::dataset::make("FractionalBits", 1, 14))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+
+TEST_SUITE_END() //TRANSPOSE_1XW
+
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallGEMMDataset(), framework::dataset::make("DataType", DataType::F16)))
diff --git a/tests/validation/CL/HOGDescriptor.cpp b/tests/validation/CL/HOGDescriptor.cpp
new file mode 100644
index 0000000..aef265a
--- /dev/null
+++ b/tests/validation/CL/HOGDescriptor.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/CLHOG.h"
+#include "arm_compute/runtime/CL/functions/CLHOGDescriptor.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/HOGDescriptorDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/HOGDescriptorFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+AbsoluteTolerance<float> tolerance(1e-2f);
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(HOGDescriptor)
+
+using CLHOGDescriptorFixture = HOGDescriptorValidationFixture<CLTensor, CLHOG, CLAccessor, CLHOGDescriptor, uint8_t, float>;
+
+// *INDENT-OFF*
+// clang-format off
+FIXTURE_DATA_TEST_CASE(RunSmall, CLHOGDescriptorFixture, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(
+                       datasets::SmallHOGDescriptorDataset(),
+                       framework::dataset::make("Format", Format::U8)),
+                       framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE})))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLHOGDescriptorFixture, framework::DatasetMode::NIGHTLY,
+                       combine(combine(
+                       datasets::LargeHOGDescriptorDataset(),
+                       framework::dataset::make("Format", Format::U8)),
+                       framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE})))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance);
+}
+// clang-format on
+// *INDENT-ON*
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/HarrisCorners.cpp b/tests/validation/CL/HarrisCorners.cpp
index 4188cb5..00b6910 100644
--- a/tests/validation/CL/HarrisCorners.cpp
+++ b/tests/validation/CL/HarrisCorners.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,6 +30,7 @@
 #include "tests/CL/CLArrayAccessor.h"
 #include "tests/PaddingCalculator.h"
 #include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/ImageFileDatasets.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
@@ -101,14 +102,15 @@
 template <typename T>
 using CLHarrisCornersFixture = HarrisCornersValidationFixture<CLTensor, CLAccessor, CLKeyPointArray, CLHarrisCorners, T>;
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLHarrisCornersFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::Small2DShapes(), data), framework::dataset::make("Format", Format::U8)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLHarrisCornersFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallImageFiles(), data), framework::dataset::make("Format",
+                                                                                                             Format::U8)))
 {
     // Validate output
     CLArrayAccessor<KeyPoint> array(_target);
     validate_keypoints(array.buffer(), array.buffer() + array.num_values(), _reference.begin(), _reference.end(), RelativeTolerance<float>(0.0001f));
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLHarrisCornersFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), data), framework::dataset::make("Format", Format::U8)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLHarrisCornersFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeImageFiles(), data), framework::dataset::make("Format", Format::U8)))
 {
     // Validate output
     CLArrayAccessor<KeyPoint> array(_target);
diff --git a/tests/validation/CL/Permute.cpp b/tests/validation/CL/Permute.cpp
new file mode 100644
index 0000000..6c31ccc
--- /dev/null
+++ b/tests/validation/CL/Permute.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLPermute.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/PermuteFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+const auto PermuteParametersSmall = combine(datasets::Small4DShapes(),
+                                            framework::dataset::make("PermutationVector", { PermutationVector(2U, 0U, 1U), PermutationVector(1U, 2U, 0U), PermutationVector(3U, 2U, 0U, 1U) }));
+const auto PermuteParametersLarge = combine(datasets::Large4DShapes(),
+                                            framework::dataset::make("PermutationVector", { PermutationVector(2U, 0U, 1U), PermutationVector(1U, 2U, 0U), PermutationVector(3U, 2U, 0U, 1U) }));
+} // namespace
+TEST_SUITE(CL)
+TEST_SUITE(Permute)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::Small4DShapes(), framework::dataset::make("DataType", { DataType::S8, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32 })),
+               shape, data_type)
+{
+    // Define permutation vector
+    const PermutationVector perm(2U, 0U, 1U);
+
+    // Permute shapes
+    TensorShape output_shape = shape;
+    permute(output_shape, perm);
+
+    // Create tensors
+    CLTensor ref_src = create_tensor<CLTensor>(shape, data_type);
+    CLTensor dst     = create_tensor<CLTensor>(output_shape, data_type);
+
+    // Create and Configure function
+    CLPermute perm_func;
+    perm_func.configure(&ref_src, &dst, perm);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(output_shape);
+    validate(dst.info()->valid_region(), valid_region);
+}
+
+template <typename T>
+using CLPermuteFixture = PermuteValidationFixture<CLTensor, CLAccessor, CLPermute, T>;
+
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPermuteFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(PermuteParametersSmall, framework::dataset::make("DataType", DataType::U8)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPermuteFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(PermuteParametersLarge, framework::dataset::make("DataType", DataType::U8)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPermuteFixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(PermuteParametersSmall, framework::dataset::make("DataType", DataType::U16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPermuteFixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(PermuteParametersLarge, framework::dataset::make("DataType", DataType::U16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPermuteFixture<uint32_t>, framework::DatasetMode::PRECOMMIT, combine(PermuteParametersSmall, framework::dataset::make("DataType", DataType::U32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPermuteFixture<uint32_t>, framework::DatasetMode::NIGHTLY, combine(PermuteParametersLarge, framework::dataset::make("DataType", DataType::U32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp
index ee63937..4e5e5aa 100644
--- a/tests/validation/CL/PoolingLayer.cpp
+++ b/tests/validation/CL/PoolingLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,6 +43,12 @@
 {
 namespace
 {
+/** Failing data set */
+const auto PoolingLayerDatasetSpecial = ((((framework::dataset::make("Shape", TensorShape{ 60U, 52U, 3U, 5U })
+                                            * framework::dataset::make("PoolType", PoolingType::AVG))
+                                           * framework::dataset::make("PoolingSize", 100))
+                                          * framework::dataset::make("PadStride", PadStrideInfo(5, 5, 50, 50)))
+                                         * framework::dataset::make("ExcludePadding", true));
 /** Input data set for floating-point data types */
 const auto PoolingLayerDatasetFP = combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { 2, 3, 4, 7, 9 })),
                                                    framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
@@ -74,7 +80,7 @@
                framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),     // Mismatching data type
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),     // Window shrink
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 4),     // Mismatching fixed point position
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS16, 11),   // Window shrink
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS16, 11),
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),     // Invalid pad/size combination
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0),     // Invalid pad/size combination
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8, 0), // Invalid parameters
@@ -104,7 +110,7 @@
                                                        PoolingLayerInfo(PoolingType::MAX),
                                                        PoolingLayerInfo(PoolingType::AVG),
                                                       })),
-               framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false, true })),
+               framework::dataset::make("Expected", { false, false, false, true, false, false, false, false, false, true })),
                input_info, output_info, pool_info, expected)
 {
     ARM_COMPUTE_EXPECT(bool(CLPoolingLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pool_info)) == expected, framework::LogLevel::ERRORS);
@@ -117,6 +123,11 @@
 
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSpecial, CLPoolingLayerFixture<float>, framework::DatasetMode::ALL, PoolingLayerDatasetSpecial * framework::dataset::make("DataType", DataType::F32))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
 FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFP, framework::dataset::make("DataType",
                                                                                                     DataType::F32))))
 {
diff --git a/tests/validation/CPP/Permute.cpp b/tests/validation/CPP/Permute.cpp
index 7f27f3c..3341da3 100644
--- a/tests/validation/CPP/Permute.cpp
+++ b/tests/validation/CPP/Permute.cpp
@@ -42,10 +42,10 @@
 {
 namespace
 {
-const auto PermuteParametersSmall = combine(concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
-                                            framework::dataset::make("PermutationVector", { PermutationVector(2U, 0U, 1U), PermutationVector(1U, 2U, 0U) }));
-const auto PermuteParametersLarge = combine(concat(datasets::Large3DShapes(), datasets::Large4DShapes()),
-                                            framework::dataset::make("PermutationVector", { PermutationVector(2U, 0U, 1U), PermutationVector(1U, 2U, 0U) }));
+const auto PermuteParametersSmall = combine(datasets::Small4DShapes(),
+                                            framework::dataset::make("PermutationVector", { PermutationVector(2U, 0U, 1U), PermutationVector(1U, 2U, 0U), PermutationVector(3U, 2U, 0U, 1U) }));
+const auto PermuteParametersLarge = combine(datasets::Large4DShapes(),
+                                            framework::dataset::make("PermutationVector", { PermutationVector(2U, 0U, 1U), PermutationVector(1U, 2U, 0U), PermutationVector(3U, 2U, 0U, 1U) }));
 } // namespace
 TEST_SUITE(CPP)
 TEST_SUITE(Permute)
diff --git a/tests/validation/GLES_COMPUTE/ArithmeticAddition.cpp b/tests/validation/GLES_COMPUTE/ArithmeticAddition.cpp
new file mode 100755
index 0000000..b16503e
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/ArithmeticAddition.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCArithmeticAddition.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ArithmeticAdditionFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Input data sets **/
+const auto ArithmeticAdditionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
+                                                   framework::dataset::make("DataType", DataType::F16));
+
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(ArithmeticAddition)
+template <typename T>
+using GCArithmeticAdditionFixture = ArithmeticAdditionValidationFixture<GCTensor, GCAccessor, GCArithmeticAddition, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCArithmeticAdditionFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP16Dataset),
+                                                                                                         framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/ConvolutionLayer.cpp b/tests/validation/GLES_COMPUTE/ConvolutionLayer.cpp
new file mode 100644
index 0000000..a5d1b69
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/ConvolutionLayer.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/LargeConvolutionLayerDataset.h"
+#include "tests/datasets/SmallConvolutionLayerDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ConvolutionLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+constexpr float                     tolerance_num = 0.07f;                /**< Tolerance number */
+
+/** CNN data types */
+const auto CNNDataTypes = framework::dataset::make("DataType",
+{
+    DataType::F16,
+    // DataType::F32,
+});
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(ConvolutionLayer)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallConvolutionLayerDataset(), datasets::LargeConvolutionLayerDataset()), CNNDataTypes),
+               input_shape, weights_shape, bias_shape, output_shape, info, data_type)
+{
+    // Set fixed point position data type allowed
+    int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
+
+    auto bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+
+    // Create tensors
+    GCTensor src     = create_tensor<GCTensor>(input_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
+    GCTensor weights = create_tensor<GCTensor>(weights_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
+    GCTensor bias    = create_tensor<GCTensor>(bias_shape, bias_data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
+    GCTensor dst     = create_tensor<GCTensor>(output_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127));
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    const QuantizationInfo src_quantization_info     = src.info()->quantization_info();
+    const QuantizationInfo weights_quantization_info = weights.info()->quantization_info();
+
+    // Create and configure function
+    GCConvolutionLayer conv;
+    conv.configure(&src, &weights, &bias, &dst, info);
+
+    // Validate valid region
+    const ValidRegion src_valid_region     = shape_to_valid_region(input_shape);
+    const ValidRegion weights_valid_region = shape_to_valid_region(weights_shape);
+    const ValidRegion bias_valid_region    = shape_to_valid_region(bias_shape);
+    const ValidRegion dst_valid_region     = shape_to_valid_region(output_shape);
+
+    validate(src.info()->valid_region(), src_valid_region);
+    validate(weights.info()->valid_region(), weights_valid_region);
+    validate(bias.info()->valid_region(), bias_valid_region);
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate QuantizationInfo
+    ARM_COMPUTE_EXPECT(src.info()->quantization_info() == src_quantization_info, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(weights.info()->quantization_info() == weights_quantization_info, framework::LogLevel::ERRORS);
+
+    //Validate padding
+    //TODO(COMPMID-415) Need to validate padding?
+}
+
+template <typename T>
+using GCConvolutionLayerFixture = ConvolutionValidationFixture<GCTensor, GCAccessor, GCConvolutionLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallConvolutionLayerDataset(),
+                                                                                                                     framework::dataset::make("ReshapeWeights", { true, false })),
+                                                                                                             framework::dataset::make("DataType",
+                                                                                                                     DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeConvolutionLayerDataset(),
+                                                                                                                   framework::dataset::make("ReshapeWeights", { true, false })),
+                                                                                                           framework::dataset::make("DataType",
+                                                                                                                   DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+}
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp b/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp
new file mode 100644
index 0000000..cacf696
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/DepthwiseConvolutionLayerDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<half> tolerance_fp16(half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+constexpr float         tolerance_num = 0.07f;     /**< Tolerance number */
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(DepthwiseConvolutionLayer)
+
+template <typename T>
+using GCDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture<GCTensor, GCAccessor, GCDepthwiseConvolutionLayer3x3, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                   framework::dataset::make("DataType",
+                                                                                                                           DataType::F16)))
+{
+    validate(GCAccessor(_target), _reference, tolerance_fp16, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthwiseConvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                       framework::dataset::make("DataType",
+                                                                                                                               DataType::F16)))
+{
+    validate(GCAccessor(_target), _reference, tolerance_fp16, tolerance_num);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/NormalizePlanarYUVLayer.cpp b/tests/validation/GLES_COMPUTE/NormalizePlanarYUVLayer.cpp
new file mode 100644
index 0000000..e06b19c
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/NormalizePlanarYUVLayer.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizePlanarYUVLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/RandomNormalizePlanarYUVLayerDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<float> tolerance_f16(0.5f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(NormalizePlanarYUVLayer)
+
+template <typename T>
+using GCNormalizePlanarYUVLayerFixture = NormalizePlanarYUVLayerValidationFixture<GCTensor, GCAccessor, GCNormalizePlanarYUVLayer, T>;
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::RandomNormalizePlanarYUVLayerDataset(), framework::dataset::make("DataType", { DataType::F16 })),
+               shape0, shape1, dt)
+{
+    // Create tensors
+    GCTensor src  = create_tensor<GCTensor>(shape0, dt, 1);
+    GCTensor dst  = create_tensor<GCTensor>(shape0, dt, 1);
+    GCTensor mean = create_tensor<GCTensor>(shape1, dt, 1);
+    GCTensor sd   = create_tensor<GCTensor>(shape1, dt, 1);
+
+    // Create and Configure function
+    GCNormalizePlanarYUVLayer norm;
+    norm.configure(&src, &dst, &mean, &sd);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape0);
+    validate(dst.info()->valid_region(), valid_region);
+}
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(Random, GCNormalizePlanarYUVLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomNormalizePlanarYUVLayerDataset(),
+                                                                                                                  framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f16, 0);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/PoolingLayer.cpp b/tests/validation/GLES_COMPUTE/PoolingLayer.cpp
new file mode 100644
index 0000000..e789dba
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/PoolingLayer.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/PoolingTypesDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/PoolingLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Input data set for floating-point data types */
+const auto PoolingLayerDatasetFP = combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { 2, 3, 4, 7, 9 })),
+                                                   framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
+                                           framework::dataset::make("ExcludePadding", { true, false }));
+
+constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */
+constexpr AbsoluteTolerance<float> tolerance_f16(0.01f);  /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(PoolingLayer)
+
+//clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+                                                                  framework::dataset::make("InputInfo",
+{
+    TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type
+    TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Window shrink
+    TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid pad/size combination
+    TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid pad/size combination
+    TensorInfo(TensorShape(15U, 13U, 5U), 1, DataType::F32, 0), // Non-rectangular Global Pooling
+    TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32, 0), // Invalid output Global Pooling
+    TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32, 0),
+}),
+framework::dataset::make("OutputInfo",
+{
+    TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16, 0), TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 0), TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32, 0), TensorInfo(TensorShape(25U, 16U, 2U), 1, DataType::F32, 0), TensorInfo(TensorShape(1U, 1U, 5U), 1, DataType::F32, 0), TensorInfo(TensorShape(2U, 2U, 5U), 1, DataType::F32, 0), TensorInfo(TensorShape(1U, 1U, 5U), 1, DataType::F32, 0),
+})),
+framework::dataset::make("PoolInfo",
+{
+    PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)), PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)), PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)), PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)), PoolingLayerInfo(PoolingType::AVG, 2, PadStrideInfo(1, 1, 2, 0)), PoolingLayerInfo(PoolingType::AVG, 2, PadStrideInfo(1, 1, 0, 2)), PoolingLayerInfo(PoolingType::L2, 3, PadStrideInfo(1, 1, 0, 0)), PoolingLayerInfo(PoolingType::AVG), PoolingLayerInfo(PoolingType::MAX), PoolingLayerInfo(PoolingType::AVG),
+})),
+framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false, true })),
+input_info, output_info, pool_info, expected)
+{
+    ARM_COMPUTE_EXPECT(bool(GCPoolingLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pool_info)) == expected, framework::LogLevel::ERRORS);
+}
+//clang-format on
+
+template <typename T>
+using GCPoolingLayerFixture = PoolingLayerValidationFixture<GCTensor, GCAccessor, GCPoolingLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCPoolingLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFP, framework::dataset::make("DataType",
+                                                                                                    DataType::F32))))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCPoolingLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), combine(PoolingLayerDatasetFP, framework::dataset::make("DataType",
+                                                                                                        DataType::F32))))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCPoolingLayerFixture<half>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFP,
+                                                                                                   framework::dataset::make("DataType", DataType::F16))))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCPoolingLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), combine(PoolingLayerDatasetFP,
+                                                                                                       framework::dataset::make("DataType", DataType::F16))))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/Scale.cpp b/tests/validation/GLES_COMPUTE/Scale.cpp
new file mode 100644
index 0000000..92c4a89
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/Scale.cpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCScale.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/SamplingPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ScaleFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** CNN data types */
+const auto ScaleDataTypes = framework::dataset::make("DataType",
+{
+    DataType::F16,
+});
+
+/** Tolerance */
+RelativeTolerance<half> tolerance_f16(half(0.1));
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(Scale)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(combine(concat(datasets::MediumShapes(), datasets::LargeShapes()), ScaleDataTypes),
+                                                                                   framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR })),
+                                                                           datasets::BorderModes()),
+                                                                   datasets::SamplingPolicies()),
+               shape, data_type, policy, border_mode, sampling_policy)
+{
+    std::mt19937                           generator(library->seed());
+    std::uniform_real_distribution<float>  distribution_float(0.25, 2);
+    const float                            scale_x = distribution_float(generator);
+    const float                            scale_y = distribution_float(generator);
+    std::uniform_int_distribution<uint8_t> distribution_u8(0, 255);
+    uint8_t                                constant_border_value = distribution_u8(generator);
+
+    // Create tensors
+    GCTensor    src = create_tensor<GCTensor>(shape, data_type);
+    TensorShape shape_scaled(shape);
+    shape_scaled.set(0, shape[0] * scale_x);
+    shape_scaled.set(1, shape[1] * scale_y);
+    GCTensor dst = create_tensor<GCTensor>(shape_scaled, data_type);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    GCScale gcscale;
+    gcscale.configure(&src, &dst, policy, border_mode, constant_border_value, sampling_policy);
+
+    // Get border size depending on border mode
+    const BorderSize border_size(border_mode == BorderMode::UNDEFINED ? 0 : 1);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = calculate_valid_region_scale(*(src.info()), shape_scaled, policy, border_size, (border_mode == BorderMode::UNDEFINED));
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape_scaled.x(), 4);
+    calculator.set_border_mode(border_mode);
+
+    //const PaddingSize read_padding(border_size);
+    const PaddingSize write_padding = calculator.required_padding(PaddingCalculator::Option::EXCLUDE_BORDER);
+    //validate(src.info()->padding(), read_padding);
+    validate(dst.info()->padding(), write_padding);
+}
+
+template <typename T>
+using GCScaleFixture = ScaleValidationFixture<GCTensor, GCAccessor, GCScale, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCScaleFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)),
+                                                                                                            framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR })),
+                                                                                                    datasets::BorderModes()),
+                                                                                            datasets::SamplingPolicies()))
+{
+    //Create valid region
+    TensorInfo        src_info(_shape, 1, _data_type);
+    const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, BorderSize(1), (_border_mode == BorderMode::UNDEFINED));
+
+    // Validate output
+    validate(GCAccessor(_target), _reference, valid_region, tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCScaleFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                        DataType::F16)),
+                                                                                                                framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR })),
+                                                                                                        datasets::BorderModes()),
+                                                                                                datasets::SamplingPolicies()))
+{
+    //Create valid region
+    TensorInfo        src_info(_shape, 1, _data_type);
+    const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, BorderSize(1), (_border_mode == BorderMode::UNDEFINED));
+
+    // Validate output
+    validate(GCAccessor(_target), _reference, valid_region, tolerance_f16);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h
old mode 100644
new mode 100755
index ab7dbd8..ba45968
--- a/tests/validation/Helpers.h
+++ b/tests/validation/Helpers.h
@@ -202,6 +202,20 @@
     return bounds;
 }
 
+/** Helper function to get the testing range for NormalizePlanarYUV layer.
+ *
+ * @return A pair containing the lower upper testing bounds.
+ */
+template <typename T>
+std::pair<T, T> get_normalize_planar_yuv_layer_test_bounds()
+{
+    std::pair<T, T> bounds;
+
+    bounds = std::make_pair(-1.f, 1.f);
+
+    return bounds;
+}
+
 /** Convert quantized simple tensor into float using tensor quantization information.
  *
  * @param[in] src Quantized tensor.
diff --git a/tests/validation/NEON/Col2Im.cpp b/tests/validation/NEON/Col2Im.cpp
index 9125dc2..9f2415d 100644
--- a/tests/validation/NEON/Col2Im.cpp
+++ b/tests/validation/NEON/Col2Im.cpp
@@ -58,8 +58,8 @@
                framework::dataset::make("Expected", { false, false, false, false, true })),
                input_info, output_info, convolved_width, convolved_height, expected)
 {
-    bool err = bool(NECol2Im::validate(&input_info, &output_info, Size2D(convolved_width, convolved_height)));
-    ARM_COMPUTE_EXPECT(err == expected, framework::LogLevel::ERRORS);
+    bool status = bool(NECol2Im::validate(&input_info, &output_info, Size2D(convolved_width, convolved_height)));
+    ARM_COMPUTE_EXPECT(status == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
 // *INDENT-ON*
diff --git a/tests/validation/NEON/Convolution.cpp b/tests/validation/NEON/Convolution.cpp
new file mode 100644
index 0000000..5af8030
--- /dev/null
+++ b/tests/validation/NEON/Convolution.cpp
@@ -0,0 +1,381 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEConvolution.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ConvolutionFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Tolerance value for comparing reference's output against implementation
+ *
+ * This is due to the fact that NEON target performs multiplication with reciprocal of scale,
+ * while reference performs direct division with scale.
+ */
+constexpr AbsoluteTolerance<uint8_t> tolerance_u8(1);
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(CustomConvolution)
+TEST_SUITE(CustomConvolutionSuqare)
+TEST_SUITE(CustomConvolution3x3)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+                                                                           datasets::BorderModes()),
+                                                                   framework::dataset::make("filter_size", { 3 })),
+               shape, data_type, border_mode, filter_size)
+{
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, data_type);
+    Tensor dst = create_tensor<Tensor>(shape, data_type);
+
+    // Create conv matrix
+    int16_t conv[9];
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NEConvolution3x3 convolution;
+    convolution.configure(&src, &dst, conv, 0, border_mode);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_size / 2));
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+    calculator.set_border_size(1);
+    calculator.set_border_mode(border_mode);
+
+    const PaddingSize dst_padding = calculator.required_padding();
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-1);
+
+    const PaddingSize src_padding = calculator.required_padding();
+
+    validate(src.info()->padding(), src_padding);
+    validate(dst.info()->padding(), dst_padding);
+}
+
+template <typename T>
+using NEConvolutionFixture = ConvolutionSquareValidationFixture<Tensor, Accessor, NEConvolution3x3, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::U8)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 3 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 3 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
+}
+TEST_SUITE_END() /* Custom Convolution3x3 */
+
+TEST_SUITE(CustomConvolution5x5)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+                                                                           datasets::BorderModes()),
+                                                                   framework::dataset::make("filter_size", { 5 })),
+               shape, data_type, border_mode, filter_size)
+{
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, data_type);
+    Tensor dst = create_tensor<Tensor>(shape, data_type);
+
+    // Create conv matrix
+    int16_t conv[25];
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NEConvolution5x5 convolution;
+    convolution.configure(&src, &dst, conv, 0, border_mode);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_size / 2));
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+    calculator.set_border_size(2);
+    calculator.set_border_mode(border_mode);
+
+    const PaddingSize dst_padding = calculator.required_padding();
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-2);
+
+    const PaddingSize src_padding = calculator.required_padding();
+
+    validate(src.info()->padding(), src_padding);
+    validate(dst.info()->padding(), dst_padding);
+}
+
+template <typename T>
+using NEConvolutionFixture = ConvolutionSquareValidationFixture<Tensor, Accessor, NEConvolution5x5, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::U8)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 5 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 5 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
+}
+TEST_SUITE_END() /* Custom Convolution 5x5 */
+
+TEST_SUITE(CustomConvolution7x7)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+                                                                           datasets::BorderModes()),
+                                                                   framework::dataset::make("filter_size", { 7 })),
+               shape, data_type, border_mode, filter_size)
+{
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, data_type);
+    Tensor dst = create_tensor<Tensor>(shape, data_type);
+
+    // Create conv matrix
+    int16_t conv[49];
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NEConvolution7x7 convolution;
+    convolution.configure(&src, &dst, conv, 0, border_mode);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_size / 2));
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+    calculator.set_border_size(3);
+    calculator.set_border_mode(border_mode);
+
+    const PaddingSize dst_padding = calculator.required_padding();
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-3);
+
+    const PaddingSize src_padding = calculator.required_padding();
+
+    validate(src.info()->padding(), src_padding);
+    validate(dst.info()->padding(), dst_padding);
+}
+
+template <typename T>
+using NEConvolutionFixture = ConvolutionSquareValidationFixture<Tensor, Accessor, NEConvolution7x7, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::U8)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 7 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 7 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
+}
+TEST_SUITE_END() /* Custom Convolution 7x7 */
+
+TEST_SUITE(CustomConvolution9x9)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", DataType::U8)),
+                                                                           datasets::BorderModes()),
+                                                                   framework::dataset::make("filter_size", { 9 })),
+               shape, data_type, border_mode, filter_size)
+{
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, data_type);
+    Tensor dst = create_tensor<Tensor>(shape, data_type);
+
+    // Create conv matrix
+    int16_t conv[81];
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NEConvolution9x9 convolution;
+    convolution.configure(&src, &dst, conv, 0, border_mode);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_size / 2));
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+    calculator.set_border_size(4);
+    calculator.set_border_mode(border_mode);
+
+    const PaddingSize dst_padding = calculator.required_padding();
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-4);
+
+    const PaddingSize src_padding = calculator.required_padding();
+
+    validate(src.info()->padding(), src_padding);
+    validate(dst.info()->padding(), dst_padding);
+}
+
+template <typename T>
+using NEConvolutionFixture = ConvolutionSquareValidationFixture<Tensor, Accessor, NEConvolution9x9, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::U8)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                           framework::dataset::make("filter_size", { 9 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                         framework::dataset::make("filter_size", { 9 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
+}
+TEST_SUITE_END() /* Custom Convolution 9x9 */
+TEST_SUITE_END() /* Custom Convolution Square */
+
+TEST_SUITE(CustomConvolutionRectangle)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType",
+                                                                                           DataType::U8)),
+                                                                                   datasets::BorderModes()),
+                                                                           framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
+                                                                   framework::dataset::make("filter_height", { 3, 5, 7, 9 })),
+               shape, data_type, border_mode, filter_width, filter_height)
+{
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, data_type);
+    Tensor dst = create_tensor<Tensor>(shape, data_type);
+
+    // Create conv matrix
+    int16_t conv[filter_width * filter_height];
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NEConvolutionRectangle convolution;
+    convolution.configure(&src, &dst, conv, filter_width, filter_height, 1, border_mode);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape, (border_mode == BorderMode::UNDEFINED), BorderSize(filter_height / 2, filter_width / 2));
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+    calculator.set_border_size(filter_width / 2);
+    calculator.set_border_mode(border_mode);
+
+    const PaddingSize dst_padding = calculator.required_padding();
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-(filter_width / 2));
+
+    const PaddingSize width_padding = calculator.required_padding();
+
+    calculator.set_border_size(filter_height / 2);
+    calculator.set_access_offset(-(filter_height / 2));
+    const PaddingSize height_padding = calculator.required_padding();
+
+    validate(src.info()->padding(), width_padding, height_padding);
+    validate(dst.info()->padding(), dst_padding);
+}
+
+template <typename T>
+using NEConvolutionFixture = ConvolutionRectangleValidationFixture<Tensor, Accessor, NEConvolutionRectangle, T>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEConvolutionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::U8)),
+                                                                                                                   datasets::BorderModes()),
+                                                                                                                   framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
+                                                                                                           framework::dataset::make("filter_height", { 3, 5, 7, 9 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::U8)),
+                                                                                                                 datasets::BorderModes()),
+                                                                                                                 framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
+                                                                                                         framework::dataset::make("filter_height", { 3, 5, 7, 9 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_u8);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp
index 751a965..9573784 100644
--- a/tests/validation/NEON/DeconvolutionLayer.cpp
+++ b/tests/validation/NEON/DeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,15 +44,11 @@
 {
 constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
 
-const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0,
-                     2)
-                     * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("ax", 1, 3) * framework::dataset::make("ay", 1, 3) * framework::dataset::make("NumKernels", { 1, 3 })
-                     *framework::dataset::make("ux", 1, 4) *framework::dataset::make("uy", 1, 4);
+const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 2)
+                     * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 });
 
-const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0,
-                     1)
-                     * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("ax", 1, 3) * framework::dataset::make("ay", 1, 3) * framework::dataset::make("NumKernels", { 1, 3 })
-                     *framework::dataset::make("ux", 1, 4) *framework::dataset::make("uy", 1, 4);
+const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1)
+                     * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 });
 
 } // namespace
 
diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
index 17eaaf8..e8c7715 100644
--- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,22 +42,24 @@
 {
 namespace
 {
-constexpr RelativeTolerance<float> tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+constexpr RelativeTolerance<float>   tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */
 } // namespace
 
 TEST_SUITE(NEON)
-TEST_SUITE(DepthwiseConvolutionLayer)
+TEST_SUITE(DepthwiseConvLayer)
 
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
                                                                                               datasets::LargeDepthwiseConvolutionLayerDataset3x3()),
                                                                    framework::dataset::make("DataType", DataType::F32)),
-               input_shape, weights_shape, bias_shape, output_shape, info, data_type)
+               input_shape, weights_shape, output_shape, info, data_type)
 {
     // Create tensors
-    Tensor src     = create_tensor<Tensor>(input_shape, data_type);
-    Tensor dst     = create_tensor<Tensor>(output_shape, data_type);
-    Tensor weights = create_tensor<Tensor>(weights_shape, data_type);
-    Tensor bias    = create_tensor<Tensor>(bias_shape, data_type);
+    Tensor            src     = create_tensor<Tensor>(input_shape, data_type);
+    Tensor            dst     = create_tensor<Tensor>(output_shape, data_type);
+    Tensor            weights = create_tensor<Tensor>(weights_shape, data_type);
+    const TensorShape bias_shape(weights_shape[2]);
+    Tensor            bias = create_tensor<Tensor>(bias_shape, data_type);
 
     ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -120,8 +122,30 @@
     validate(Accessor(_target), _reference, tolerance_f32);
 }
 TEST_SUITE_END()
+TEST_SUITE_END()
 
 TEST_SUITE_END()
+
+template <typename T>
+using NEDepthwiseConvolutionLayerQuantizedFixture3x3 = DepthwiseConvolutionLayerValidationQuantizedFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer3x3, T>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })))
+{
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
 TEST_SUITE_END()
 
 TEST_SUITE_END()
diff --git a/tests/validation/NEON/DepthwiseSeparableConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseSeparableConvolutionLayer.cpp
index c3e6705..9816a0d 100644
--- a/tests/validation/NEON/DepthwiseSeparableConvolutionLayer.cpp
+++ b/tests/validation/NEON/DepthwiseSeparableConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,7 +27,7 @@
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/NEON/Accessor.h"
 #include "tests/PaddingCalculator.h"
-#include "tests/datasets/MobileNetDepthwiseSeparableConvolutionLayerDataset.h"
+#include "tests/datasets/system_tests/mobilenet/MobileNetDepthwiseSeparableConvolutionLayerDataset.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
diff --git a/tests/validation/NEON/Derivative.cpp b/tests/validation/NEON/Derivative.cpp
new file mode 100644
index 0000000..17c5315
--- /dev/null
+++ b/tests/validation/NEON/Derivative.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/NEON/functions/NEDerivative.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/GradientDimensionDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DerivativeFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(NEON)
+TEST_SUITE(Derivative)
+
+using NEDerivativeFixture = DerivativeValidationFixture<Tensor, Accessor, NEDerivative, uint8_t, int16_t>;
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                   Format::U8)),
+               shape, border_mode, format)
+{
+    // Generate a random constant value
+    std::mt19937                           gen(library->seed());
+    std::uniform_int_distribution<uint8_t> int_dist(0, 255);
+    const uint8_t                          constant_border_value = int_dist(gen);
+
+    // Create tensors
+    Tensor src   = create_tensor<Tensor>(shape, data_type_from_format(format));
+    Tensor dst_x = create_tensor<Tensor>(shape, DataType::S16);
+    Tensor dst_y = create_tensor<Tensor>(shape, DataType::S16);
+
+    src.info()->set_format(format);
+    dst_x.info()->set_format(Format::S16);
+    dst_y.info()->set_format(Format::S16);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst_x.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst_y.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create Derivative configure function
+    NEDerivative derivative;
+    derivative.configure(&src, &dst_x, &dst_y, border_mode, constant_border_value);
+
+    // Validate valid region
+    constexpr BorderSize border_size{ 1 };
+    const ValidRegion    dst_valid_region = shape_to_valid_region(shape, border_mode == BorderMode::UNDEFINED, border_size);
+
+    validate(dst_x.info()->valid_region(), dst_valid_region);
+    validate(dst_y.info()->valid_region(), dst_valid_region);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDerivativeFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                         Format::U8)),
+                                                                                                 datasets::GradientDimensions()))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(Accessor(_target.first), _reference.first, valid_region_x);
+
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(Accessor(_target.second), _reference.second, valid_region_y);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDerivativeFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
+                                                                                                       Format::U8)),
+                                                                                               datasets::GradientDimensions()))
+{
+    // Validate output
+    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(Accessor(_target.first), _reference.first, valid_region_x);
+
+    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
+    validate(Accessor(_target.second), _reference.second, valid_region_y);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/GEMM.cpp b/tests/validation/NEON/GEMM.cpp
index 811a6f0..cc8279a 100644
--- a/tests/validation/NEON/GEMM.cpp
+++ b/tests/validation/NEON/GEMM.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMM.h"
 #include "arm_compute/runtime/Tensor.h"
@@ -37,6 +38,7 @@
 #include "tests/validation/Validation.h"
 #include "tests/validation/fixtures/GEMMFixture.h"
 #include "tests/validation/fixtures/GEMMInterleave4x4Fixture.h"
+#include "tests/validation/fixtures/GEMMTranspose1xWFixture.h"
 
 namespace arm_compute
 {
@@ -61,11 +63,53 @@
 });
 
 const auto data_interleave = framework::dataset::make("M", 8, 12) * framework::dataset::make("N", 8, 12);
+const auto data_transpose  = framework::dataset::make("M", 8, 14) * framework::dataset::make("N", 7, 14);
+
 } // namespace
 
 TEST_SUITE(NEON)
 TEST_SUITE(GEMM)
 
+TEST_SUITE(TRANSPOSE_1XW)
+using NEGEMMTranspose1xW        = NESynthetizeFunctionWithZeroConstantBorder<NEGEMMTranspose1xWKernel, 4>;
+using NEGEMMTranspose1xWFixture = GEMMTranspose1xWValidationFixture<Tensor, Accessor, NEGEMMTranspose1xW, float>;
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose * framework::dataset::make("DataType", DataType::F32))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QS8)
+using NEGEMMTranspose1xW        = NESynthetizeFunctionWithZeroConstantBorder<NEGEMMTranspose1xWKernel, 16>;
+using NEGEMMTranspose1xWFixture = GEMMTranspose1xWValidationFixedPointFixture<Tensor, Accessor, NEGEMMTranspose1xW, int8_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose *
+                       framework::dataset::make("DataType", DataType::QS8)
+                       * framework::dataset::make("FractionalBits", 1, 7))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(QS16)
+using NEGEMMTranspose1xW        = NESynthetizeFunctionWithZeroConstantBorder<NEGEMMTranspose1xWKernel, 8>;
+using NEGEMMTranspose1xWFixture = GEMMTranspose1xWValidationFixedPointFixture<Tensor, Accessor, NEGEMMTranspose1xW, int16_t>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMTranspose1xWFixture, framework::DatasetMode::PRECOMMIT, data_transpose *
+                       framework::dataset::make("DataType", DataType::QS16)
+                       * framework::dataset::make("FractionalBits", 1, 14))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+
+TEST_SUITE_END() // TRANSPOSE_1XW
+
 TEST_SUITE(INTERLEAVE_4X4)
 using NEGEMMInterleave4x4 = NESynthetizeFunctionWithZeroConstantBorder<NEGEMMInterleave4x4Kernel, 4>;
 
diff --git a/tests/validation/NEON/HOGDescriptor.cpp b/tests/validation/NEON/HOGDescriptor.cpp
new file mode 100644
index 0000000..5f31773
--- /dev/null
+++ b/tests/validation/NEON/HOGDescriptor.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/HOG.h"
+#include "arm_compute/runtime/NEON/functions/NEHOGDescriptor.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/HOGDescriptorDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/HOGDescriptorFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+AbsoluteTolerance<float> tolerance(0.5f);
+constexpr float          tolerance_number = 0.01f;
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(HOGDescriptor)
+
+// *INDENT-OFF*
+// clang-format off
+using NEHOGDescriptorFixture = HOGDescriptorValidationFixture<Tensor, HOG, Accessor, NEHOGDescriptor, uint8_t, float>;
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEHOGDescriptorFixture, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(
+                       datasets::SmallHOGDescriptorDataset(),
+                       framework::dataset::make("Format", Format::U8)),
+                       framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE})))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance, tolerance_number);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEHOGDescriptorFixture, framework::DatasetMode::NIGHTLY,
+                       combine(combine(
+                       datasets::LargeHOGDescriptorDataset(),
+                       framework::dataset::make("Format", Format::U8)),
+                       framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE})))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance, tolerance_number);
+}
+// clang-format on
+// *INDENT-ON*
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/HarrisCorners.cpp b/tests/validation/NEON/HarrisCorners.cpp
index fa8d3cb..b8d7917 100644
--- a/tests/validation/NEON/HarrisCorners.cpp
+++ b/tests/validation/NEON/HarrisCorners.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,6 +29,7 @@
 #include "tests/NEON/ArrayAccessor.h"
 #include "tests/PaddingCalculator.h"
 #include "tests/datasets/BorderModeDataset.h"
+#include "tests/datasets/ImageFileDatasets.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
@@ -107,7 +108,8 @@
 template <typename T>
 using NEHarrisCornersFixture = HarrisCornersValidationFixture<Tensor, Accessor, KeyPointArray, NEHarrisCorners, T>;
 
-FIXTURE_DATA_TEST_CASE(RunSmall, NEHarrisCornersFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::Small2DShapes(), data), framework::dataset::make("Format", Format::U8)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEHarrisCornersFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallImageFiles(), data), framework::dataset::make("Format",
+                                                                                                             Format::U8)))
 {
     // Validate output
     ArrayAccessor<KeyPoint> array(_target);
@@ -120,7 +122,7 @@
                        allowed_mismatch_percentage);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, NEHarrisCornersFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), data), framework::dataset::make("Format", Format::U8)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEHarrisCornersFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeImageFiles(), data), framework::dataset::make("Format", Format::U8)))
 {
     // Validate output
     ArrayAccessor<KeyPoint> array(_target);
diff --git a/tests/validation/NEON/Im2Col.cpp b/tests/validation/NEON/Im2Col.cpp
index 4faa7d7..f8e474b 100644
--- a/tests/validation/NEON/Im2Col.cpp
+++ b/tests/validation/NEON/Im2Col.cpp
@@ -56,8 +56,8 @@
                framework::dataset::make("Expected", { false, false, false, false, true })),
                input_info, output_info, has_bias, expected)
 {
-    bool err = bool(NEIm2Col::validate(&input_info, &output_info, Size2D(3U, 3U), PadStrideInfo(), has_bias));
-    ARM_COMPUTE_EXPECT(err == expected, framework::LogLevel::ERRORS);
+    bool status = bool(NEIm2Col::validate(&input_info, &output_info, Size2D(3U, 3U), PadStrideInfo(), has_bias));
+    ARM_COMPUTE_EXPECT(status == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
 // *INDENT-ON*
diff --git a/tests/validation/NEON/PoolingLayer.cpp b/tests/validation/NEON/PoolingLayer.cpp
index a71f5f2..e1c4ed5 100644
--- a/tests/validation/NEON/PoolingLayer.cpp
+++ b/tests/validation/NEON/PoolingLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -53,12 +53,18 @@
                                                    framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
                                            framework::dataset::make("ExcludePadding", { false }));
 
+/** Input data set for asymmetric data type */
+const auto PoolingLayerDatasetQASYMM8 = combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { 2, 3, 9 })),
+                                                        framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
+                                                framework::dataset::make("ExcludePadding", { true, false }));
+
 constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for float types */
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-constexpr AbsoluteTolerance<float> tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for float types */
-#endif                                                   /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-constexpr AbsoluteTolerance<float> tolerance_qs8(0);     /**< Tolerance value for comparing reference's output against implementation's output for quantized input */
-constexpr AbsoluteTolerance<float> tolerance_qs16(0);    /**< Tolerance value for comparing reference's output against implementation's output for quantized input */
+constexpr AbsoluteTolerance<float> tolerance_f16(0.01f);   /**< Tolerance value for comparing reference's output against implementation's output for float types */
+#endif                                                     /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+constexpr AbsoluteTolerance<float>   tolerance_qs8(0);     /**< Tolerance value for comparing reference's output against implementation's output for quantized input */
+constexpr AbsoluteTolerance<float>   tolerance_qs16(0);    /**< Tolerance value for comparing reference's output against implementation's output for quantized input */
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for 8-bit asymmetric type */
 } // namespace
 
 TEST_SUITE(NEON)
@@ -146,7 +152,7 @@
 template <typename T>
 using NEPoolingLayerFixedPointFixture = PoolingLayerValidationFixedPointFixture<Tensor, Accessor, NEPoolingLayer, T>;
 
-TEST_SUITE(Quantized)
+TEST_SUITE(FixedPoint)
 TEST_SUITE(QS8)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixedPointFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQS,
                                                                                                                        framework::dataset::make("DataType", DataType::QS8))),
@@ -182,6 +188,31 @@
 TEST_SUITE_END()
 TEST_SUITE_END()
 
+TEST_SUITE(Quantized)
+
+template <typename T>
+using NEPoolingLayerQuantizedFixture = PoolingLayerValidationQuantizedFixture<Tensor, Accessor, NEPoolingLayer, T>;
+
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQASYMM8,
+                                                                                                                       framework::dataset::make("DataType", DataType::QASYMM8))),
+                                                                                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127),
+                                                                                                                       QuantizationInfo(7.f / 255, 123)
+                                                                                                                                                            })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetQASYMM8,
+                                                                                                                   framework::dataset::make("DataType", DataType::QASYMM8))),
+                                                                                                                   framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255, 0) })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
 TEST_SUITE_END()
 TEST_SUITE_END()
 } // namespace validation
diff --git a/tests/validation/Validation.cpp b/tests/validation/Validation.cpp
index ebca193..d01ac12 100644
--- a/tests/validation/Validation.cpp
+++ b/tests/validation/Validation.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -170,6 +170,14 @@
     ARM_COMPUTE_EXPECT_EQUAL(padding.left, reference.left, framework::LogLevel::ERRORS);
 }
 
+void validate(const arm_compute::PaddingSize &padding, const arm_compute::PaddingSize &width_reference, const arm_compute::PaddingSize &height_reference)
+{
+    ARM_COMPUTE_EXPECT_EQUAL(padding.top, height_reference.top, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT_EQUAL(padding.right, width_reference.right, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT_EQUAL(padding.bottom, height_reference.bottom, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT_EQUAL(padding.left, width_reference.left, framework::LogLevel::ERRORS);
+}
+
 void validate(const IAccessor &tensor, const void *reference_value)
 {
     ARM_COMPUTE_ASSERT(reference_value != nullptr);
diff --git a/tests/validation/Validation.h b/tests/validation/Validation.h
index 1f81d38..b12d7de 100644
--- a/tests/validation/Validation.h
+++ b/tests/validation/Validation.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -163,6 +163,12 @@
  */
 void validate(const arm_compute::PaddingSize &padding, const arm_compute::PaddingSize &reference);
 
+/** Validate padding.
+ *
+ * Padding on all sides has to be the same.
+ */
+void validate(const arm_compute::PaddingSize &padding, const arm_compute::PaddingSize &width_reference, const arm_compute::PaddingSize &height_reference);
+
 /** Validate tensors.
  *
  * - Dimensionality has to be the same.
diff --git a/tests/validation/fixtures/ConvolutionFixture.h b/tests/validation/fixtures/ConvolutionFixture.h
new file mode 100644
index 0000000..85070cf
--- /dev/null
+++ b/tests/validation/fixtures/ConvolutionFixture.h
@@ -0,0 +1,276 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_CONVOLUTION_FIXTURE
+#define ARM_COMPUTE_TEST_CONVOLUTION_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/Convolution.h"
+
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ConvolutionValidationFixture : public framework::Fixture
+{
+protected:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type, BorderMode border_mode, const unsigned int width, const unsigned int height, const bool is_separable = false)
+    {
+        std::mt19937                           gen(library->seed());
+        std::uniform_int_distribution<uint8_t> distribution(0, 255);
+        const uint8_t                          constant_border_value = distribution(gen);
+
+        // Generate random scale value between 1 and 255.
+        std::uniform_int_distribution<uint8_t> distribution_scale(1, 255);
+        const uint32_t                         scale = distribution_scale(gen);
+
+        ARM_COMPUTE_ERROR_ON(3 != width && 5 != width && 7 != width && 9 != width);
+        ARM_COMPUTE_ERROR_ON(3 != height && 5 != height && 7 != height && 9 != height);
+
+        int16_t conv[width * height];
+
+        _width  = width;
+        _height = height;
+
+        if(is_separable)
+        {
+            create_separable_conv(conv);
+        }
+        else
+        {
+            create_conv(conv);
+        }
+
+        _target    = compute_target(shape, data_type, conv, scale, border_mode, constant_border_value);
+        _reference = compute_reference(shape, data_type, conv, scale, border_mode, constant_border_value);
+    }
+
+    void
+    create_conv(int16_t *conv)
+    {
+        std::mt19937                           gen(library->seed());
+        std::uniform_int_distribution<int16_t> distribution_int16(-32768, 32767);
+
+        for(unsigned int i = 0; i < _width * _height; ++i)
+        {
+            conv[i] = distribution_int16(gen);
+        }
+    }
+
+    void
+    create_separable_conv(int16_t *conv)
+    {
+        std::mt19937 gen(library->seed());
+        // Set it between -128 and 127 to ensure the matrix does not overflow
+        std::uniform_int_distribution<int16_t> distribution_int16(-128, 127);
+
+        int16_t conv_row[_width];
+        int16_t conv_col[_height];
+
+        conv_row[0] = conv_col[0] = 1;
+        for(unsigned int i = 1; i < _width; ++i)
+        {
+            conv_row[i] = distribution_int16(gen);
+            conv_col[i] = distribution_int16(gen);
+        }
+
+        // Multiply two matrices
+        for(unsigned int i = 0; i < _width; ++i)
+        {
+            for(unsigned int j = 0; j < _height; ++j)
+            {
+                conv[i * _width + j] = conv_col[i] * conv_row[j];
+            }
+        }
+    }
+
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        library->fill_tensor_uniform(tensor, i);
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
+    {
+        ARM_COMPUTE_ERROR_ON(data_type != DataType::U8);
+
+        // Create reference
+        SimpleTensor<T> src{ shape, data_type };
+
+        // Fill reference
+        fill(src, 0);
+
+        // Compute reference
+        return reference::convolution<T>(src, conv, scale, border_mode, constant_border_value, _width, _height);
+    }
+
+    virtual TensorType compute_target(const TensorShape &shape, DataType data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value) = 0;
+
+    BorderMode      _border_mode{};
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+    unsigned int    _width{};
+    unsigned int    _height{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ConvolutionSquareValidationFixture : public ConvolutionValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type, BorderMode border_mode, const unsigned int width)
+    {
+        ConvolutionValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, border_mode, width, width);
+    }
+
+protected:
+    TensorType compute_target(const TensorShape &shape, DataType data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
+    {
+        // Create tensors
+        TensorType src = create_tensor<TensorType>(shape, data_type);
+        TensorType dst = create_tensor<TensorType>(shape, data_type);
+
+        // Create and configure function
+        FunctionType convolution;
+        convolution.configure(&src, &dst, conv, scale, border_mode, constant_border_value);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        this->fill(AccessorType(src), 0);
+        this->fill(AccessorType(dst), 1);
+
+        // Compute function
+        convolution.run();
+
+        return dst;
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ConvolutionSeparableValidationFixture : public ConvolutionValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type, BorderMode border_mode, const unsigned int width)
+    {
+        ConvolutionValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, border_mode, width, width, true);
+    }
+
+protected:
+    TensorType compute_target(const TensorShape &shape, DataType data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
+    {
+        // Create tensors
+        TensorType src = create_tensor<TensorType>(shape, data_type);
+        TensorType dst = create_tensor<TensorType>(shape, data_type);
+
+        // Create and configure function
+        FunctionType convolution;
+        convolution.configure(&src, &dst, conv, scale, border_mode, constant_border_value);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        this->fill(AccessorType(src), 0);
+        this->fill(AccessorType(dst), 1);
+
+        // Compute function
+        convolution.run();
+
+        return dst;
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class ConvolutionRectangleValidationFixture : public ConvolutionValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type, BorderMode border_mode, const unsigned int width, const unsigned int height)
+    {
+        ConvolutionValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, border_mode, width, height);
+    }
+
+protected:
+    TensorType compute_target(const TensorShape &shape, DataType data_type, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
+    {
+        // Create tensors
+        TensorType src = create_tensor<TensorType>(shape, data_type);
+        TensorType dst = create_tensor<TensorType>(shape, data_type);
+
+        // Create and configure function
+        FunctionType convolution;
+        convolution.configure(&src, &dst, conv, this->_width, this->_height, scale, border_mode, constant_border_value);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        this->fill(AccessorType(src), 0);
+        this->fill(AccessorType(dst), 1);
+
+        // Compute function
+        convolution.run();
+
+        return dst;
+    }
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_CONVOLUTION_FIXTURE */
diff --git a/tests/validation/fixtures/DeconvolutionLayerFixture.h b/tests/validation/fixtures/DeconvolutionLayerFixture.h
index e98f5e9..f2455f3 100644
--- a/tests/validation/fixtures/DeconvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DeconvolutionLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,20 +43,15 @@
 class DeconvolutionLayerFixtureBase : public framework::Fixture
 {
 public:
-    /*
-     *
-     * @param[in] a The number of zeros added to right and bottom edges of the input.
-     * @param[in] u How much to scale the X and Y axis.
-     */
     template <typename...>
     void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
-               const std::pair<unsigned int, unsigned int> &a, const std::pair<unsigned int, unsigned int> &u, DataType data_type, int fractional_bits)
+               const std::pair<unsigned int, unsigned int> &inner_border, DataType data_type, int fractional_bits)
     {
         _fractional_bits = fractional_bits;
         _data_type       = data_type;
 
-        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, a, u, data_type, fractional_bits);
-        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, a, data_type, fractional_bits);
+        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, inner_border, data_type, fractional_bits);
+        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, inner_border, data_type, fractional_bits);
     }
 
 protected:
@@ -75,13 +70,9 @@
                 library->fill_tensor_uniform(tensor, i);
         }
     }
-    /*
-     *
-     * @param[in] a The number of zeros added to right and bottom edges of the input.
-     * @param[in] u How much to scale the X and Y axis.
-     */
+
     TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape,
-                              const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &a, const std::pair<float, float> &u, DataType data_type, int fixed_point_position)
+                              const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &inner_border, DataType data_type, int fixed_point_position)
     {
         // Create tensors
         TensorType src     = create_tensor<TensorType>(input_shape, data_type, 1, fixed_point_position);
@@ -91,7 +82,7 @@
 
         // Create and configure function
         FunctionType conv;
-        conv.configure(&src, &weights, &bias, &dst, info, a.first, a.second, u.first, u.second);
+        conv.configure(&src, &weights, &bias, &dst, info, inner_border.first, inner_border.second);
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -121,7 +112,7 @@
     }
 
     SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape,
-                                      const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> a, DataType data_type, int fixed_point_position)
+                                      const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> inner_border, DataType data_type, int fixed_point_position)
     {
         // Create reference
         SimpleTensor<T> src{ input_shape, data_type, 1, fixed_point_position };
@@ -133,7 +124,7 @@
         fill(weights, 1);
         fill(bias, 2);
 
-        return reference::deconvolution_layer<T>(src, weights, bias, output_shape, info, a);
+        return reference::deconvolution_layer<T>(src, weights, bias, output_shape, info, inner_border);
     }
 
     TensorType      _target{};
@@ -148,18 +139,16 @@
 public:
     template <typename...>
     void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int padx, unsigned int pady,
-               unsigned int ax, unsigned int ay, unsigned int ux, unsigned int uy, unsigned int num_kernels, DataType data_type)
+               unsigned int inner_border_right, unsigned int inner_border_top, unsigned int num_kernels, DataType data_type)
     {
         ARM_COMPUTE_ERROR_ON_MSG(kernel_size_x != kernel_size_y, "Only square kernels supported");
         const TensorShape   weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels);
         const TensorShape   bias_shape(num_kernels);
         const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL);
-        const std::pair<unsigned int, unsigned int> a(ax, ay);
-        const std::pair<float, float>               u(ux, uy);
-        auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, padx, pady, a.first, a.second, u.first, u.second,
-                                                       DimensionRoundingType::CEIL);
+        const std::pair<unsigned int, unsigned int> inner_border(inner_border_right, inner_border_top);
+        auto        out_dim      = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, padx, pady, inner_border.first, inner_border.second, sx, sy);
         TensorShape output_shape = deconvolution_output_shape(out_dim, input_shape, weights_shape);
-        DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, a, u, data_type, 0);
+        DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, inner_border, data_type, 0);
     }
 };
 
diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
index 4426215..df5436f 100644
--- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,6 +34,8 @@
 #include "tests/validation/Helpers.h"
 #include "tests/validation/reference/DepthwiseConvolutionLayer.h"
 
+#include "utils/Utils.h"
+
 #include <random>
 
 namespace arm_compute
@@ -50,12 +52,12 @@
 
 public:
     template <typename...>
-    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info)
+    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info)
     {
         _quantization_info = quantization_info;
         _data_type         = data_type;
-
-        const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+        const TensorShape biases_shape(weights_shape[2]);
+        const DataType    bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
 
         _target    = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info);
         _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info);
@@ -74,6 +76,7 @@
                 break;
             }
             case DataType::F32:
+            case DataType::F16:
             {
                 std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
                 library->fill(tensor, distribution, i);
@@ -81,7 +84,7 @@
             }
             case DataType::S32:
             {
-                std::uniform_int_distribution<int32_t> distribution(-1000, 1000);
+                std::uniform_int_distribution<int32_t> distribution(-100, 100);
                 library->fill(tensor, distribution, i);
                 break;
             }
@@ -135,7 +138,7 @@
     {
         SimpleTensor<T>     src{ in_shape, data_type, 1, 0, quantization_info };
         SimpleTensor<T>     weights{ weights_shape, data_type, 1, 0, quantization_info };
-        SimpleTensor<TBias> biases{ biases_shape, data_type, 1, 0, quantization_info };
+        SimpleTensor<TBias> biases{ biases_shape, bias_data_type, 1, 0, quantization_info };
 
         fill(src, 0);
         fill(weights, 1);
@@ -155,9 +158,9 @@
 {
 public:
     template <typename...>
-    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type)
+    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type)
     {
-        DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info,
+        DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, weights_shape, out_shape, pad_stride_info,
                                                                                                             data_type, QuantizationInfo());
     }
 };
@@ -167,9 +170,9 @@
 {
 public:
     template <typename...>
-    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info)
+    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info)
     {
-        DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info,
+        DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, weights_shape, out_shape, pad_stride_info,
                                                                                                             data_type, quantization_info);
     }
 };
diff --git a/tests/validation/fixtures/DerivativeFixture.h b/tests/validation/fixtures/DerivativeFixture.h
new file mode 100644
index 0000000..2df3340
--- /dev/null
+++ b/tests/validation/fixtures/DerivativeFixture.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_DERIVATIVE_FIXTURE
+#define ARM_COMPUTE_TEST_DERIVATIVE_FIXTURE
+
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/Types.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/Derivative.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename U>
+class DerivativeValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, BorderMode border_mode, Format format, GradientDimension gradient_dimension)
+    {
+        // Generate a random constant value
+        std::mt19937                           gen(library->seed());
+        std::uniform_int_distribution<uint8_t> int_dist(0, 255);
+        const uint8_t                          constant_border_value = int_dist(gen);
+
+        _border_mode = border_mode;
+        _target      = compute_target(shape, border_mode, format, constant_border_value, gradient_dimension);
+        _reference   = compute_reference(shape, border_mode, format, constant_border_value, gradient_dimension);
+    }
+
+protected:
+    template <typename V>
+    void fill(V &&tensor)
+    {
+        library->fill_tensor_uniform(tensor, 0);
+    }
+
+    template <typename V>
+    void fill_zero(V &&tensor)
+    {
+        library->fill_tensor_uniform(tensor, 0, static_cast<U>(0), static_cast<U>(0));
+    }
+
+    std::pair<TensorType, TensorType> compute_target(const TensorShape &shape, BorderMode border_mode, Format format, uint8_t constant_border_value, GradientDimension gradient_dimension)
+    {
+        // Create tensors
+        TensorType src   = create_tensor<TensorType>(shape, data_type_from_format(format));
+        TensorType dst_x = create_tensor<TensorType>(shape, data_type_from_format(Format::S16));
+        TensorType dst_y = create_tensor<TensorType>(shape, data_type_from_format(Format::S16));
+
+        src.info()->set_format(format);
+        dst_x.info()->set_format(Format::S16);
+        dst_y.info()->set_format(Format::S16);
+
+        FunctionType derivative;
+
+        switch(gradient_dimension)
+        {
+            case GradientDimension::GRAD_X:
+                derivative.configure(&src, &dst_x, nullptr, border_mode, constant_border_value);
+                break;
+            case GradientDimension::GRAD_Y:
+                derivative.configure(&src, nullptr, &dst_y, border_mode, constant_border_value);
+                break;
+            case GradientDimension::GRAD_XY:
+                derivative.configure(&src, &dst_x, &dst_y, border_mode, constant_border_value);
+                break;
+            default:
+                ARM_COMPUTE_ERROR("Gradient dimension not supported");
+        }
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst_x.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst_y.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst_x.allocator()->allocate();
+        dst_y.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst_x.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst_y.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src));
+        fill_zero(AccessorType(dst_x));
+        fill_zero(AccessorType(dst_y));
+
+        // Compute function
+        derivative.run();
+
+        return std::make_pair(std::move(dst_x), std::move(dst_y));
+    }
+
+    std::pair<SimpleTensor<U>, SimpleTensor<U>> compute_reference(const TensorShape &shape, BorderMode border_mode, Format format, uint8_t constant_border_value, GradientDimension gradient_dimension)
+    {
+        // Create reference
+        SimpleTensor<T> src{ shape, format };
+
+        // Fill reference
+        fill(src);
+
+        return reference::derivative<U>(src, border_mode, constant_border_value, gradient_dimension);
+    }
+
+    BorderMode _border_mode{ BorderMode::UNDEFINED };
+    std::pair<TensorType, TensorType>           _target{};
+    std::pair<SimpleTensor<U>, SimpleTensor<U>> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_DERIVATIVE_FIXTURE */
diff --git a/tests/validation/fixtures/FlattenLayerFixture.h b/tests/validation/fixtures/FlattenLayerFixture.h
index 436138b..3de0ba4 100644
--- a/tests/validation/fixtures/FlattenLayerFixture.h
+++ b/tests/validation/fixtures/FlattenLayerFixture.h
@@ -76,9 +76,7 @@
     TensorType compute_target(const TensorShape &shape, DataType data_type)
     {
         TensorShape shape_flatten(shape);
-        shape_flatten.set(0, shape[0] * shape[1] * shape[2]);
-        shape_flatten.remove_dimension(1);
-        shape_flatten.remove_dimension(1);
+        shape_flatten.collapse(3);
 
         // Create tensors
         TensorType src = create_tensor<TensorType>(shape, data_type, 1, _fractional_bits);
diff --git a/tests/validation/fixtures/GEMMTranspose1xWFixture.h b/tests/validation/fixtures/GEMMTranspose1xWFixture.h
new file mode 100644
index 0000000..d83d5e9
--- /dev/null
+++ b/tests/validation/fixtures/GEMMTranspose1xWFixture.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_GEMM_TRANSPOSE_1XW_FIXTURE
+#define ARM_COMPUTE_TEST_GEMM_TRANSPOSE_1XW_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/GEMMTranspose1xW.h"
+
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class GEMMTranspose1xWValidationFixedPointFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(size_t x, size_t y, DataType data_type, int fractional_bits)
+    {
+        _fractional_bits = fractional_bits;
+        _data_type       = data_type;
+        const TensorShape  shape_a(x, y);
+        const unsigned int transpose_w = 16 / data_size_from_type(data_type);
+        const TensorShape  shape_b(static_cast<size_t>(y * transpose_w), static_cast<size_t>(std::ceil(x / static_cast<float>(transpose_w))));
+        _target    = compute_target(shape_a, shape_b, data_type, fractional_bits);
+        _reference = compute_reference(shape_a, shape_b, data_type, fractional_bits);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        switch(tensor.data_type())
+        {
+            case DataType::F16:
+            case DataType::F32:
+            {
+                std::uniform_real_distribution<> distribution(-1.f, 1.f);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            default:
+                library->fill_tensor_uniform(tensor, i);
+                break;
+        }
+    }
+
+    TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, DataType data_type, int fixed_point_position)
+    {
+        // Create tensors
+        TensorType a = create_tensor<TensorType>(shape_a, data_type, 1, fixed_point_position);
+        TensorType b = create_tensor<TensorType>(shape_b, data_type, 1, fixed_point_position);
+
+        // Create and configure function
+        FunctionType f;
+        f.configure(&a, &b);
+
+        ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        a.allocator()->allocate();
+        b.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!a.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!b.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(a), 0);
+        fill(AccessorType(b), 1);
+
+        // Compute GEMM function
+        f.run();
+
+        return b;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, DataType data_type, int fixed_point_position)
+    {
+        // Create reference
+        SimpleTensor<T> a{ shape_a, data_type, 1, fixed_point_position };
+
+        // Fill reference
+        fill(a, 0);
+
+        return reference::gemm_transpose_1xW<T>(a);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+    int             _fractional_bits{};
+    DataType        _data_type{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class GEMMTranspose1xWValidationFixture : public GEMMTranspose1xWValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(size_t x, size_t y, DataType data_type)
+    {
+        GEMMTranspose1xWValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(x, y, data_type, 0);
+    }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_GEMM_TRANSPOSE_1XW_FIXTURE */
diff --git a/tests/validation/fixtures/HOGDescriptorFixture.h b/tests/validation/fixtures/HOGDescriptorFixture.h
new file mode 100644
index 0000000..cabee63
--- /dev/null
+++ b/tests/validation/fixtures/HOGDescriptorFixture.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_HOG_DESCRIPTOR_FIXTURE
+#define ARM_COMPUTE_TEST_HOG_DESCRIPTOR_FIXTURE
+
+#include "arm_compute/core/HOGInfo.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/HOGDescriptor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename HOGType, typename AccessorType, typename FunctionType, typename T, typename U>
+class HOGDescriptorValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(std::string image, HOGInfo hog_info, Format format, BorderMode border_mode)
+    {
+        // Only defined borders supported
+        ARM_COMPUTE_ERROR_ON(border_mode == BorderMode::UNDEFINED);
+
+        // Generate a random constant value
+        std::mt19937                     gen(library->seed());
+        std::uniform_int_distribution<T> int_dist(0, 255);
+        const T                          constant_border_value = int_dist(gen);
+
+        _target    = compute_target(image, format, border_mode, constant_border_value, hog_info);
+        _reference = compute_reference(image, format, border_mode, constant_border_value, hog_info);
+    }
+
+protected:
+    template <typename V>
+    void fill(V &&tensor, const std::string image, Format format)
+    {
+        library->fill(tensor, image, format);
+    }
+
+    template <typename V, typename D>
+    void fill(V &&tensor, int i, D max)
+    {
+        library->fill_tensor_uniform(tensor, i, static_cast<D>(0), max);
+    }
+
+    TensorType compute_target(const std::string image, Format &format, BorderMode &border_mode, T constant_border_value, const HOGInfo &hog_info)
+    {
+        // Get image shape for src tensor
+        TensorShape shape = library->get_image_shape(image);
+
+        // Create tensor info for HOG descriptor
+        TensorInfo tensor_info_hog_descriptor(hog_info, shape.x(), shape.y());
+
+        // Create HOG
+        HOGType hog = create_HOG<HOGType>(hog_info.cell_size(),
+                                          hog_info.block_size(),
+                                          hog_info.detection_window_size(),
+                                          hog_info.block_stride(),
+                                          hog_info.num_bins(),
+                                          hog_info.normalization_type(),
+                                          hog_info.l2_hyst_threshold(),
+                                          hog_info.phase_type());
+
+        // Create tensors
+        TensorType src = create_tensor<TensorType>(shape, data_type_from_format(format));
+        TensorType dst = create_tensor<TensorType>(tensor_info_hog_descriptor.tensor_shape(), DataType::F32, tensor_info_hog_descriptor.num_channels());
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Create and configure function
+        FunctionType hog_descriptor;
+        hog_descriptor.configure(&src, &dst, &hog, border_mode, constant_border_value);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        const T max = std::numeric_limits<T>::max();
+
+        // Fill tensors
+        fill(AccessorType(src), image, format);
+        fill(AccessorType(dst), 1, static_cast<U>(max));
+
+        // Compute function
+        hog_descriptor.run();
+
+        return dst;
+    }
+
+    SimpleTensor<U> compute_reference(const std::string image, Format format, BorderMode border_mode, T constant_border_value, const HOGInfo &hog_info)
+    {
+        // Create reference
+        SimpleTensor<T> src{ library->get_image_shape(image), data_type_from_format(format) };
+
+        // Fill reference
+        fill(src, image, format);
+
+        return reference::hog_descriptor<U>(src, border_mode, constant_border_value, hog_info);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<U> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_HOG_DESCRIPTOR_FIXTURE */
diff --git a/tests/validation/fixtures/HarrisCornersFixture.h b/tests/validation/fixtures/HarrisCornersFixture.h
index d78845b..e3c29ae 100644
--- a/tests/validation/fixtures/HarrisCornersFixture.h
+++ b/tests/validation/fixtures/HarrisCornersFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,19 +47,19 @@
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, int gradient_size, int block_size, BorderMode border_mode, bool use_fp16, Format format)
+    void setup(std::string image, int gradient_size, int block_size, BorderMode border_mode, bool use_fp16, Format format)
     {
         HarrisCornersParameters params = harris_corners_parameters();
 
-        _target = compute_target(shape, gradient_size, block_size, border_mode, use_fp16, format, params);
-        _reference = compute_reference(shape, gradient_size, block_size, border_mode, format, params);
+        _target = compute_target(image, gradient_size, block_size, border_mode, use_fp16, format, params);
+        _reference = compute_reference(image, gradient_size, block_size, border_mode, format, params);
     }
 
 protected:
     template <typename U>
-    void fill(U &&tensor)
+    void fill(U &&tensor, RawTensor raw)
     {
-        library->fill_tensor_uniform(tensor, 0);
+        library->fill(tensor, raw);
     }
 
     template <typename F, typename std::enable_if<std::is_same<F, NEHarrisCorners>::value, int>::type = 0>
@@ -76,14 +76,16 @@
         func.configure(&src, params.threshold, params.min_dist, params.sensitivity, gradient_size, block_size, &corners, border_mode, params.constant_border_value);
     }
 
-    ArrayType compute_target(const TensorShape &shape, int gradient_size, int block_size, BorderMode border_mode, bool use_fp16, Format format, const HarrisCornersParameters &params)
+    ArrayType compute_target(std::string image, int gradient_size, int block_size, BorderMode border_mode, bool use_fp16, Format format, const HarrisCornersParameters &params)
     {
+        // Load the image (cached by the library if loaded before)
+        const RawTensor &raw = library->get(image, format);
+
         // Create tensors
-        TensorType src = create_tensor<TensorType>(shape, data_type_from_format(format));
-        src.info()->set_format(format);
+        TensorType src = create_tensor<TensorType>(raw.shape(), format);
 
         // Create array of keypoints
-        ArrayType corners(shape.total_size());
+        ArrayType corners(raw.shape().total_size());
 
         // Create harris corners configure function
         FunctionType harris_corners;
@@ -97,7 +99,7 @@
         ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
 
         // Fill tensors
-        fill(AccessorType(src));
+        fill(AccessorType(src), raw);
 
         // Compute function
         harris_corners.run();
@@ -105,13 +107,15 @@
         return corners;
     }
 
-    std::vector<KeyPoint> compute_reference(const TensorShape &shape, int gradient_size, int block_size, BorderMode border_mode, Format format, const HarrisCornersParameters &params)
+    std::vector<KeyPoint> compute_reference(std::string image, int gradient_size, int block_size, BorderMode border_mode, Format format, const HarrisCornersParameters &params)
     {
+        // Load the image (cached by the library if loaded before)
+        const RawTensor &raw = library->get(image, format);
         // Create reference
-        SimpleTensor<T> src{ shape, format };
+        SimpleTensor<T> src{ raw.shape(), format };
 
         // Fill reference
-        fill(src);
+        fill(src, raw);
 
         return reference::harris_corner_detector<T>(src, params.threshold, params.min_dist, params.sensitivity, gradient_size, block_size, border_mode, params.constant_border_value);
     }
diff --git a/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h b/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h
new file mode 100644
index 0000000..ae5c53a
--- /dev/null
+++ b/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_NORMALIZE_PLANAR_YUV_LAYER_FIXTURE
+#define ARM_COMPUTE_TEST_NORMALIZE_PLANAR_YUV_LAYER_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/NormalizePlanarYUVLayer.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class NormalizePlanarYUVLayerValidationFixedPointFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape0, TensorShape shape1, DataType dt)
+    {
+        _data_type = dt;
+        _target    = compute_target(shape0, shape1, dt);
+        _reference = compute_reference(shape0, shape1, dt);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&src_tensor, U &&mean_tensor, U &&sd_tensor)
+    {
+        if(is_data_type_float(_data_type))
+        {
+            float min_bound = 0.f;
+            float max_bound = 0.f;
+            std::tie(min_bound, max_bound) = get_normalize_planar_yuv_layer_test_bounds<T>();
+            std::uniform_real_distribution<> distribution(min_bound, max_bound);
+            std::uniform_real_distribution<> distribution_sd(0, max_bound);
+            library->fill(src_tensor, distribution, 0);
+            library->fill(mean_tensor, distribution, 1);
+            library->fill(sd_tensor, distribution_sd, 2);
+        }
+    }
+
+    TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, DataType dt)
+    {
+        // Create tensors
+        TensorType src  = create_tensor<TensorType>(shape0, dt, 1);
+        TensorType dst  = create_tensor<TensorType>(shape0, dt, 1);
+        TensorType mean = create_tensor<TensorType>(shape1, dt, 1);
+        TensorType sd   = create_tensor<TensorType>(shape1, dt, 1);
+
+        // Create and configure function
+        FunctionType norm;
+        norm.configure(&src, &dst, &mean, &sd);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(mean.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(sd.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+        mean.allocator()->allocate();
+        sd.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!mean.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!sd.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src), AccessorType(mean), AccessorType(sd));
+
+        // Compute function
+        norm.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, DataType dt)
+    {
+        // Create reference
+        SimpleTensor<T> ref_src{ shape0, dt, 1 };
+        SimpleTensor<T> ref_mean{ shape1, dt, 1 };
+        SimpleTensor<T> ref_sd{ shape1, dt, 1 };
+
+        // Fill reference
+        fill(ref_src, ref_mean, ref_sd);
+
+        return reference::normalize_planar_yuv_layer(ref_src, ref_mean, ref_sd);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+    DataType        _data_type{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class NormalizePlanarYUVLayerValidationFixture : public NormalizePlanarYUVLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape0, TensorShape shape1, DataType dt)
+    {
+        NormalizePlanarYUVLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(shape0, shape1, dt);
+    }
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_NORMALIZE_PLANAR_YUV_LAYER_FIXTURE */
diff --git a/tests/validation/reference/Convolution.cpp b/tests/validation/reference/Convolution.cpp
new file mode 100644
index 0000000..777e2df
--- /dev/null
+++ b/tests/validation/reference/Convolution.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Helpers.h"
+
+#include "Convolution.h"
+#include "Utils.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> convolution(const SimpleTensor<T> &src, const int16_t *conv, uint32_t scale, BorderMode border_mode, T constant_border_value, const unsigned int width, const unsigned int height)
+{
+    SimpleTensor<T>       dst(src.shape(), src.data_type());
+    SimpleTensor<int32_t> sum(src.shape(), src.data_type());
+
+    for(int element_idx = 0; element_idx < src.num_elements(); ++element_idx)
+    {
+        const Coordinates id = index2coord(src.shape(), element_idx);
+        apply_2d_spatial_filter(id, src, sum, TensorShape(width, height), conv, 1, border_mode, constant_border_value);
+
+        if(tensor_elem_at<int32_t>(sum, id, border_mode, constant_border_value) < 0)
+        {
+            dst[element_idx] = 0;
+        }
+        else if((tensor_elem_at<int32_t>(sum, id, border_mode, constant_border_value) / scale) > 255)
+        {
+            dst[element_idx] = 255;
+        }
+        else
+        {
+            dst[element_idx] = tensor_elem_at<int32_t>(sum, id, border_mode, constant_border_value) / scale;
+        }
+    }
+
+    return dst;
+}
+
+template SimpleTensor<uint8_t> convolution(const SimpleTensor<uint8_t> &src, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value,
+                                           const unsigned int widht, const unsigned int height);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Convolution.h b/tests/validation/reference/Convolution.h
new file mode 100644
index 0000000..ea9f4e4
--- /dev/null
+++ b/tests/validation/reference/Convolution.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_CONVOLUTION_H__
+#define __ARM_COMPUTE_TEST_CONVOLUTION_H__
+
+#include "tests/SimpleTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> convolution(const SimpleTensor<T> &src, const int16_t *conv, uint32_t scale, BorderMode border_mode, T constant_border_value, const unsigned int width, const unsigned int height);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_CONVOLUTION_H__ */
diff --git a/tests/validation/reference/ConvolutionLayer.cpp b/tests/validation/reference/ConvolutionLayer.cpp
index 1066411..567fac0 100644
--- a/tests/validation/reference/ConvolutionLayer.cpp
+++ b/tests/validation/reference/ConvolutionLayer.cpp
@@ -210,7 +210,7 @@
 
     acc = asymm_rounding_divide_by_pow2(asymm_int_mult(acc, output_multiplier), output_shift);
     acc += output_offset;
-    acc = clamp<int32_t>(acc, 0, 255);
+    acc = utility::clamp<int32_t>(acc, 0, 255);
 
     // Store the result
     *out_ptr = acc;
diff --git a/tests/validation/reference/DeconvolutionLayer.cpp b/tests/validation/reference/DeconvolutionLayer.cpp
index 82c2188..0cf1087 100644
--- a/tests/validation/reference/DeconvolutionLayer.cpp
+++ b/tests/validation/reference/DeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,26 +39,27 @@
                                     const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &a)
 {
     // Create reference
+    const int   stride_x     = info.stride().first;
+    const int   stride_y     = info.stride().second;
     TensorShape scaled_shape = src.shape();
-    scaled_shape.set(0, output_shape.x());
-    scaled_shape.set(1, output_shape.y());
+    int         out_x        = src.shape().x() + (src.shape().x() - 1) * (stride_x - 1) + a.first + 2 * info.pad().first;
+    int         out_y        = src.shape().y() + (src.shape().y() - 1) * (stride_y - 1) + a.second + 2 * info.pad().second;
+    scaled_shape.set(0, out_x);
+    scaled_shape.set(1, out_y);
     SimpleTensor<T> scaled{ scaled_shape, src.data_type(), 1, src.fixed_point_position() };
 
-    const int          width_in      = src.shape().x();
-    const int          height_in     = src.shape().y();
-    const int          width_scaled  = scaled.shape().x();
-    const int          height_scaled = scaled.shape().y();
-    const int          num_2d_slices = src.shape().total_size() / (width_in * height_in);
-    const float        width_ratio   = static_cast<float>(width_in) / static_cast<float>(width_scaled);
-    const float        height_ratio  = static_cast<float>(height_in) / static_cast<float>(height_scaled);
-    const int          ax            = a.first;  // The number of zeros added to right edge of the input.
-    const int          ay            = a.second; // The number of zeros added to bottom edge of the input.
-    const unsigned int kernel_size   = weights.shape().x();
-    ARM_COMPUTE_ERROR_ON(info.pad().first > (kernel_size - 1));
-    const int transposed_convolution_padx = kernel_size - info.pad().first - 1;
-    const int transposed_convolution_pady = kernel_size - info.pad().second - 1;
-    const int stridex                     = info.stride().first;
-    const int stridey                     = info.stride().second;
+    const int width_in      = src.shape().x();
+    const int height_in     = src.shape().y();
+    const int width_scaled  = scaled.shape().x();
+    const int height_scaled = scaled.shape().y();
+    const int num_2d_slices = src.shape().total_size() / (width_in * height_in);
+    const int ax            = a.first;  // The number of zeros added to right edge of the input.
+    const int ay            = a.second; // The number of zeros added to top edge of the input.
+    ARM_COMPUTE_ERROR_ON(info.pad().first > (weights.shape().x() - 1));
+
+    ARM_COMPUTE_ERROR_ON_MSG(ax > stride_x - 1, "ax must be smaller than stride_x");
+    ARM_COMPUTE_ERROR_ON_MSG(ay > stride_y - 1, "ay must be smaller than stride_y");
+
     for(int j = 0; j < scaled.num_elements(); ++j)
     {
         scaled[j] = T(0);
@@ -68,34 +69,23 @@
     {
         const int offset_slice_in  = slice * width_in * height_in;
         const int offset_slice_out = slice * width_scaled * height_scaled;
-        for(int yi = ay; yi < height_scaled; yi += stridey)
+        const int start_x          = info.pad().first;
+        const int start_y          = ay + info.pad().second;
+        const int end_y            = height_scaled - info.pad().second;
+        const int end_x            = width_scaled - ax - info.pad().first;
+
+        for(int yi = start_y, in_y = 0; yi < end_y; yi += stride_y, in_y++)
         {
-            for(int xi = transposed_convolution_padx; xi < width_scaled; xi += stridex)
+            for(int xi = start_x, in_x = 0; xi < end_x; xi += stride_x, in_x++)
             {
-                const float x_src     = (xi + 0.5f) * width_ratio - 0.5f;
-                const float y_src     = (yi + 0.5f) * height_ratio - 0.5f;
-                T          *out       = scaled.data() + offset_slice_out + xi + yi * width_scaled;
-                const bool  in_bounds = x_src > -1 && y_src > -1 && x_src < width_in && y_src < height_in;
-                const bool  in_axy    = xi < transposed_convolution_padx || xi >= (width_scaled - ax)  // this is checking if the x coordinate is in the padded left/right area
-                                        || yi < ay || yi >= (height_scaled - transposed_convolution_pady); // like above but top and bottom padding in the upscaled XY plane
-                if(!in_axy)
-                {
-                    if(in_bounds)
-                    {
-                        const int in_scaled_x = (x_src < 0.f) ? static_cast<int>(x_src - 0.5f) : static_cast<int>(x_src + 0.5f);
-                        const int in_scaled_y = (y_src < 0.f) ? static_cast<int>(y_src - 0.5f) : static_cast<int>(y_src + 0.5f);
-                        const T *in          = src.data() + offset_slice_in + in_scaled_x + in_scaled_y * width_in;
-                        *out                  = *in;
-                    }
-                    else
-                    {
-                        *out = T(0);
-                    }
-                }
+                const T *in  = src.data() + offset_slice_in + in_y * width_in + in_x;
+                T       *out = scaled.data() + offset_slice_out + xi + yi * width_scaled;
+                *out         = *in;
             }
         }
     }
-    const PadStrideInfo conv_info(1, 1, 1, 1, DimensionRoundingType::CEIL);
+
+    const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
     return convolution_layer(scaled, weights, bias, output_shape, conv_info);
 }
 
diff --git a/tests/validation/reference/DeconvolutionLayer.h b/tests/validation/reference/DeconvolutionLayer.h
index 8222e32..c0bc1fa 100644
--- a/tests/validation/reference/DeconvolutionLayer.h
+++ b/tests/validation/reference/DeconvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,7 +42,7 @@
  * bias             Optional, ignored if NULL. The biases have one dimension. Data type supported: Same as @p input.
  * output_shape     Output tensor shape. The output has the same number of dimensions as the @p input.
  * info             Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
- * a                The number of zeros added to right edge of the input.
+ * a                The number of zeros added to right and top edges of the input.
  *
  */
 template <typename T>
diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.cpp b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
index 0e88d3d..6ca347f 100644
--- a/tests/validation/reference/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/reference/DepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -89,14 +89,15 @@
                     Coordinates coords(static_cast<int>(x), static_cast<int>(y), static_cast<int>(z), static_cast<int>(r));
                     size_t      filter_offset = filter_plane * z;
 
-                    T val = 0;
+                    T val(0);
                     for(int j = y - filter_half_height; j <= static_cast<int>(y + filter_half_height); ++j)
                     {
                         for(int i = x - filter_half_width; i <= static_cast<int>(x + filter_half_width); ++i)
                         {
                             coords.set(0, i);
                             coords.set(1, j);
-                            val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, 0.f);
+                            T border_value(0);
+                            val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, border_value);
                             ++filter_offset;
                         }
                     }
@@ -155,17 +156,17 @@
             {
                 for(int x = minimum_x; x < input_width + pad_x - filter_half_size; x += conv_info.stride().first)
                 {
-                    Coordinates coords(x, y, z);
+                    Coordinates coords(x, y, z, r);
                     int         filter_offset = filter_plane * z;
 
-                    uint32_t val = 0;
+                    int32_t val = 0;
                     for(int j = y - filter_half_size; j <= (y + filter_half_size); ++j)
                     {
                         for(int i = x - filter_half_size; i <= (x + filter_half_size); ++i)
                         {
                             coords.set(0, i);
                             coords.set(1, j);
-                            auto    in_val = tensor_elem_at<uint8_t>(src, coords, BorderMode::CONSTANT, 0);
+                            auto    in_val = tensor_elem_at<uint8_t>(src, coords, BorderMode::CONSTANT, -input_offset);
                             uint8_t w_val  = *(weights.data() + filter_offset);
                             val += (in_val + input_offset) * (w_val + weights_offset);
                             ++filter_offset;
@@ -189,6 +190,9 @@
 
 template SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape,
                                                    const PadStrideInfo &conv_info);
+
+template SimpleTensor<half> depthwise_convolution(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &biases, const TensorShape &dst_shape,
+                                                  const PadStrideInfo &conv_info);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/Derivative.cpp b/tests/validation/reference/Derivative.cpp
new file mode 100644
index 0000000..0ef8fc2
--- /dev/null
+++ b/tests/validation/reference/Derivative.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Derivative.h"
+
+#include "Utils.h"
+#include "tests/Types.h"
+
+#include <array>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+const std::array<int8_t, 9> derivative_3_x{ { 0, 0, 0, -1, 0, 1, 0, 0, 0 } };
+const std::array<int8_t, 9> derivative_3_y{ { 0, -1, 0, 0, 0, 0, 0, 1, 0 } };
+
+template <typename T>
+struct data_type;
+
+template <>
+struct data_type<int16_t>
+{
+    const static DataType value = DataType::S16;
+};
+} // namespace
+
+template <typename T, typename U>
+std::pair<SimpleTensor<T>, SimpleTensor<T>> derivative(const SimpleTensor<U> &src, BorderMode border_mode, uint8_t constant_border_value, GradientDimension gradient_dimension)
+{
+    const unsigned int filter_size = 3;
+
+    SimpleTensor<T> dst_x(src.shape(), data_type<T>::value, src.num_channels());
+    SimpleTensor<T> dst_y(src.shape(), data_type<T>::value, src.num_channels());
+
+    ValidRegion valid_region = shape_to_valid_region(src.shape(), border_mode == BorderMode::UNDEFINED, BorderSize(filter_size / 2));
+
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        Coordinates coord = index2coord(src.shape(), i);
+
+        if(!is_in_valid_region(valid_region, coord))
+        {
+            continue;
+        }
+
+        switch(gradient_dimension)
+        {
+            case GradientDimension::GRAD_X:
+                apply_2d_spatial_filter(coord, src, dst_x, TensorShape{ filter_size, filter_size }, derivative_3_x.data(), 1.f, border_mode,
+                                        constant_border_value);
+                break;
+            case GradientDimension::GRAD_Y:
+                apply_2d_spatial_filter(coord, src, dst_y, TensorShape{ filter_size, filter_size }, derivative_3_y.data(), 1.f, border_mode,
+                                        constant_border_value);
+                break;
+            case GradientDimension::GRAD_XY:
+                apply_2d_spatial_filter(coord, src, dst_x, TensorShape{ filter_size, filter_size }, derivative_3_x.data(), 1.f, border_mode,
+                                        constant_border_value);
+                apply_2d_spatial_filter(coord, src, dst_y, TensorShape{ filter_size, filter_size }, derivative_3_y.data(), 1.f, border_mode,
+                                        constant_border_value);
+                break;
+            default:
+                ARM_COMPUTE_ERROR("Gradient dimension not supported");
+        }
+    }
+
+    return std::make_pair(dst_x, dst_y);
+}
+
+template std::pair<SimpleTensor<int16_t>, SimpleTensor<int16_t>> derivative(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value,
+                                                                            GradientDimension gradient_dimension);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/Derivative.h b/tests/validation/reference/Derivative.h
new file mode 100644
index 0000000..27664a7
--- /dev/null
+++ b/tests/validation/reference/Derivative.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_DERIVATIVE_H__
+#define __ARM_COMPUTE_TEST_DERIVATIVE_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T, typename U>
+std::pair<SimpleTensor<T>, SimpleTensor<T>> derivative(const SimpleTensor<U> &src, BorderMode border_mode, uint8_t constant_border_value, GradientDimension gradient_dimension);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_DERIVATIVE_H__ */
diff --git a/tests/validation/reference/FullyConnectedLayer.cpp b/tests/validation/reference/FullyConnectedLayer.cpp
index c24881e..5384715 100644
--- a/tests/validation/reference/FullyConnectedLayer.cpp
+++ b/tests/validation/reference/FullyConnectedLayer.cpp
@@ -138,7 +138,7 @@
 
         acc = asymm_rounding_divide_by_pow2(asymm_int_mult(acc, output_multiplier), output_shift);
         acc += output_offset;
-        acc = clamp<int32_t>(acc, 0, 255);
+        acc = utility::clamp<int32_t>(acc, 0, 255);
 
         // Store the result
         dst_ptr[y] = static_cast<uint8_t>(acc);
diff --git a/tests/validation/reference/GEMMTranspose1xW.h b/tests/validation/reference/GEMMTranspose1xW.h
new file mode 100644
index 0000000..d6a2e89
--- /dev/null
+++ b/tests/validation/reference/GEMMTranspose1xW.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GEMM.h"
+
+#include "arm_compute/core/Types.h"
+#include "tests/validation/FixedPoint.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> gemm_transpose_1xW(const SimpleTensor<T> &in)
+{
+    const int         W = 16 / sizeof(T);
+    const TensorShape shape_out(static_cast<size_t>(in.shape().y() * W), static_cast<size_t>(std::ceil(in.shape().x() / static_cast<float>(W))));
+    SimpleTensor<T>   out(shape_out, in.data_type());
+    const int32_t     in_height     = in.shape().y();
+    const int32_t     in_width      = in.shape().x();
+    const int32_t     out_width     = out.shape().x();
+    const T          *in_base_addr  = reinterpret_cast<const T *>(in.data());
+    T                *out_base_addr = reinterpret_cast<T *>(out.data());
+    int               x             = 0;
+    for(; x < in_width; x += W)
+    {
+        for(int y = 0; y < in_height; y++)
+        {
+            const T *in_addr  = (in_base_addr + x + y * in_width);
+            T       *out_addr = (out_base_addr + y * W + (x / W) * out_width);
+
+            for(int k = 0; k < W; ++k)
+            {
+                // If the input width is not multiple of W, we fill the reference with 0s
+                if((x + k) >= in_width)
+                {
+                    out_addr[k] = T(0);
+                }
+                else
+                {
+                    out_addr[k] = in_addr[k];
+                }
+            }
+        }
+    }
+    return out;
+}
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/HOGDescriptor.cpp b/tests/validation/reference/HOGDescriptor.cpp
new file mode 100644
index 0000000..369ac74
--- /dev/null
+++ b/tests/validation/reference/HOGDescriptor.cpp
@@ -0,0 +1,273 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "HOGDescriptor.h"
+
+#include "Derivative.h"
+#include "Magnitude.h"
+#include "Phase.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+template <typename T>
+void hog_orientation_compute(const SimpleTensor<T> &mag, const SimpleTensor<T> &phase, std::vector<T> &bins, const HOGInfo &hog_info)
+{
+    const size_t num_bins    = hog_info.num_bins();
+    const size_t cell_height = hog_info.cell_size().height;
+    const size_t cell_width  = hog_info.cell_size().width;
+
+    float phase_scale = (PhaseType::SIGNED == hog_info.phase_type() ? num_bins / 360.0f : num_bins / 180.0f);
+    phase_scale *= (PhaseType::SIGNED == hog_info.phase_type() ? 360.0f / 255.0f : 1.0f);
+
+    int row_idx = 0;
+    for(size_t yc = 0; yc < cell_height; ++yc)
+    {
+        for(size_t xc = 0; xc < cell_height; xc++)
+        {
+            const float mag_value   = mag[(row_idx + xc)];
+            const float phase_value = phase[(row_idx + xc)] * phase_scale + 0.5f;
+            const float w1          = phase_value - floor(phase_value);
+
+            // The quantised phase is the histogram index [0, num_bins - 1]
+            // Check limit of histogram index. If hidx == num_bins, hidx = 0
+            const auto hidx = static_cast<unsigned int>(phase_value) % num_bins;
+
+            // Weighted vote between 2 bins
+            bins[hidx] += mag_value * (1.0f - w1);
+            bins[(hidx + 1) % num_bins] += mag_value * w1;
+        }
+
+        row_idx += cell_width;
+    }
+}
+
+template <typename T>
+void hog_block_normalization_compute(SimpleTensor<T> &block, SimpleTensor<T> &desc, const HOGInfo &hog_info, size_t block_idx)
+{
+    const int         num_bins_per_block = desc.num_channels();
+    const HOGNormType norm_type          = hog_info.normalization_type();
+    const Coordinates id                 = index2coord(desc.shape(), block_idx);
+
+    float sum = 0.0f;
+
+    // Calculate sum
+    for(int i = 0; i < num_bins_per_block; ++i)
+    {
+        const float val = block[i];
+        sum += (norm_type == HOGNormType::L1_NORM) ? std::fabs(val) : val * val;
+    }
+
+    // Calculate normalization scale
+    float scale = 1.0f / (std::sqrt(sum) + num_bins_per_block * 0.1f);
+
+    if(norm_type == HOGNormType::L2HYS_NORM)
+    {
+        // Reset sum
+        sum = 0.0f;
+        for(int i = 0; i < num_bins_per_block; ++i)
+        {
+            float val = block[i] * scale;
+
+            // Clip scaled input_value if over l2_hyst_threshold
+            val = fmin(val, hog_info.l2_hyst_threshold());
+            sum += val * val;
+            block[i] = val;
+        }
+
+        // We use the same constants of OpenCV
+        scale = 1.0f / (std::sqrt(sum) + 1e-3f);
+    }
+
+    for(int i = 0; i < num_bins_per_block; ++i)
+    {
+        block[i] *= scale;
+        reinterpret_cast<float *>(desc(id))[i] = block[i];
+    }
+}
+} // namespace
+
+template <typename T, typename U, typename V>
+void hog_orientation_binning(const SimpleTensor<T> &mag, const SimpleTensor<U> &phase, SimpleTensor<V> &hog_space, const HOGInfo &hog_info)
+{
+    const size_t cell_width   = hog_info.cell_size().width;
+    const size_t cell_height  = hog_info.cell_size().height;
+    const size_t shape_width  = hog_space.shape().x() * hog_info.cell_size().width;
+    const size_t shape_height = hog_space.shape().y() * hog_info.cell_size().height;
+
+    SimpleTensor<V> mag_cell(TensorShape(cell_width, cell_height), DataType::F32);
+    SimpleTensor<V> phase_cell(TensorShape(cell_width, cell_height), DataType::F32);
+
+    int cell_idx = 0;
+    int y_offset = 0;
+    int x_offset = 0;
+
+    // Traverse shape
+    for(auto sy = cell_height - 1; sy < shape_height; sy += cell_height)
+    {
+        x_offset = 0;
+        for(auto sx = cell_width - 1; sx < shape_width; sx += cell_width)
+        {
+            int row_idx  = 0;
+            int elem_idx = 0;
+
+            // Traverse cell
+            for(auto y = 0u; y < cell_height; ++y)
+            {
+                for(auto x = 0u; x < cell_width; ++x)
+                {
+                    int shape_idx        = x + row_idx + x_offset + y_offset;
+                    mag_cell[elem_idx]   = mag[shape_idx];
+                    phase_cell[elem_idx] = phase[shape_idx];
+                    elem_idx++;
+                }
+
+                row_idx += shape_width;
+            }
+
+            // Partition magnitude values into bins based on phase values
+            std::vector<V> bins(hog_info.num_bins());
+            hog_orientation_compute(mag_cell, phase_cell, bins, hog_info);
+
+            for(size_t i = 0; i < hog_info.num_bins(); ++i)
+            {
+                hog_space[cell_idx * hog_info.num_bins() + i] = bins[i];
+            }
+
+            x_offset += cell_width;
+            cell_idx++;
+        }
+
+        y_offset += (cell_height * shape_width);
+    }
+}
+
+template <typename T>
+void hog_block_normalization(SimpleTensor<T> &desc, const SimpleTensor<T> &hog_space, const HOGInfo &hog_info)
+{
+    const Size2D cells_per_block        = hog_info.num_cells_per_block();
+    const Size2D cells_per_block_stride = hog_info.num_cells_per_block_stride();
+
+    const size_t block_width         = hog_info.block_size().width;
+    const size_t block_height        = hog_info.block_size().height;
+    const size_t block_stride_width  = hog_info.block_stride().width;
+    const size_t block_stride_height = hog_info.block_stride().height;
+    const size_t shape_width         = hog_space.shape().x() * hog_info.cell_size().width;
+    const size_t shape_height        = hog_space.shape().y() * hog_info.cell_size().height;
+
+    const size_t num_bins     = hog_info.num_bins();
+    const size_t num_channels = cells_per_block.area() * num_bins;
+
+    SimpleTensor<T> block(TensorShape{ 1u, 1u }, DataType::F32, num_channels);
+
+    int block_idx      = 0;
+    int block_y_offset = 0;
+
+    // Traverse shape
+    for(auto sy = block_width - 1; sy < shape_height; sy += block_stride_height)
+    {
+        int block_x_offset = 0;
+        for(auto sx = block_height - 1; sx < shape_width; sx += block_stride_width)
+        {
+            int cell_y_offset = 0;
+            int elem_idx      = 0;
+
+            // Traverse block
+            for(auto y = 0u; y < cells_per_block.height; ++y)
+            {
+                int cell_x_offset = 0;
+                for(auto x = 0u; x < cells_per_block.width; ++x)
+                {
+                    for(auto bin = 0u; bin < num_bins; ++bin)
+                    {
+                        int idx         = bin + cell_x_offset + cell_y_offset + block_x_offset + block_y_offset;
+                        block[elem_idx] = hog_space[idx];
+                        elem_idx++;
+                    }
+
+                    cell_x_offset += num_bins;
+                }
+
+                cell_y_offset += hog_space.shape().x() * num_bins;
+            }
+
+            // Normalize block and write to descriptor
+            hog_block_normalization_compute(block, desc, hog_info, block_idx);
+
+            block_x_offset += cells_per_block_stride.width * num_bins;
+            block_idx++;
+        }
+
+        block_y_offset += cells_per_block_stride.height * num_bins * hog_space.shape().x();
+    }
+}
+
+template <typename T, typename U>
+SimpleTensor<T> hog_descriptor(const SimpleTensor<U> &src, BorderMode border_mode, U constant_border_value, const HOGInfo &hog_info)
+{
+    SimpleTensor<int16_t> _mag;
+    SimpleTensor<uint8_t> _phase;
+
+    SimpleTensor<int16_t> grad_x;
+    SimpleTensor<int16_t> grad_y;
+
+    // Create tensor info for HOG descriptor
+    TensorInfo      desc_info(hog_info, src.shape().x(), src.shape().y());
+    SimpleTensor<T> desc(desc_info.tensor_shape(), DataType::F32, desc_info.num_channels());
+
+    // Create HOG space tensor (num_cells_x, num_cells_y)
+    TensorShape hog_space_shape(src.shape().x() / hog_info.cell_size().width,
+                                src.shape().y() / hog_info.cell_size().height);
+
+    // For each cell a histogram with a num_bins is created
+    TensorInfo      info_hog_space(hog_space_shape, hog_info.num_bins(), DataType::F32);
+    SimpleTensor<T> hog_space(info_hog_space.tensor_shape(), DataType::F32, info_hog_space.num_channels());
+
+    // Calculate derivative
+    std::tie(grad_x, grad_y) = derivative<int16_t>(src, border_mode, constant_border_value, GradientDimension::GRAD_XY);
+
+    // Calculate magnitude and phase
+    _mag   = magnitude(grad_x, grad_y, MagnitudeType::L2NORM);
+    _phase = phase(grad_x, grad_y, hog_info.phase_type());
+
+    // For each cell create histogram based on magnitude and phase
+    hog_orientation_binning(_mag, _phase, hog_space, hog_info);
+
+    // Normalize histograms based on block size
+    hog_block_normalization(desc, hog_space, hog_info);
+
+    return desc;
+}
+
+template SimpleTensor<float> hog_descriptor(const SimpleTensor<uint8_t> &src, BorderMode border_mode, uint8_t constant_border_value, const HOGInfo &hog_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/HOGDescriptor.h b/tests/validation/reference/HOGDescriptor.h
new file mode 100644
index 0000000..e886445
--- /dev/null
+++ b/tests/validation/reference/HOGDescriptor.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_HOG_DESCRIPTOR_H__
+#define __ARM_COMPUTE_TEST_HOG_DESCRIPTOR_H__
+
+#include "tests/SimpleTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T, typename U>
+SimpleTensor<T> hog_descriptor(const SimpleTensor<U> &src, BorderMode border_mode, U constant_border_value, const HOGInfo &hog_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_HOG_DESCRIPTOR_H__ */
diff --git a/tests/validation/reference/NormalizePlanarYUVLayer.cpp b/tests/validation/reference/NormalizePlanarYUVLayer.cpp
new file mode 100644
index 0000000..2442943
--- /dev/null
+++ b/tests/validation/reference/NormalizePlanarYUVLayer.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "NormalizePlanarYUVLayer.h"
+
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+// NormalizePlanarYUV Layer for floating point type
+template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type *>
+SimpleTensor<T> normalize_planar_yuv_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &mean, const SimpleTensor<T> &sd)
+{
+    SimpleTensor<T> result(src.shape(), src.data_type());
+
+    const auto cols       = static_cast<int>(src.shape()[0]);
+    const auto rows       = static_cast<int>(src.shape()[1]);
+    const auto depth      = static_cast<int>(src.shape()[2]);
+    const int  upper_dims = src.shape().total_size() / (cols * rows * depth);
+
+    for(int r = 0; r < upper_dims; ++r)
+    {
+        for(int i = 0; i < depth; ++i)
+        {
+            for(int k = 0; k < rows; ++k)
+            {
+                for(int l = 0; l < cols; ++l)
+                {
+                    const int pos = l + k * cols + i * rows * cols + r * cols * rows * depth;
+                    result[pos]   = (src[pos] - mean[i]) / sd[i];
+                }
+            }
+        }
+    }
+    return result;
+}
+
+template SimpleTensor<half> normalize_planar_yuv_layer(const SimpleTensor<half> &src, const SimpleTensor<half> &mean, const SimpleTensor<half> &sd);
+
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/reference/NormalizePlanarYUVLayer.h b/tests/validation/reference/NormalizePlanarYUVLayer.h
new file mode 100644
index 0000000..c8740a3
--- /dev/null
+++ b/tests/validation/reference/NormalizePlanarYUVLayer.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_NORMALIZE_PLANAR_YUV_LAYER_H__
+#define __ARM_COMPUTE_TEST_NORMALIZE_PLANAR_YUV_LAYER_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type * = nullptr>
+SimpleTensor<T> normalize_planar_yuv_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &mean, const SimpleTensor<T> &sd);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_NORMALIZE_PLANAR_YUV_LAYER_H__ */
diff --git a/tests/validation/reference/Scale.cpp b/tests/validation/reference/Scale.cpp
index 727325f..0cc96ab 100644
--- a/tests/validation/reference/Scale.cpp
+++ b/tests/validation/reference/Scale.cpp
@@ -22,10 +22,9 @@
  * SOFTWARE.
  */
 
-#include "arm_compute/core/Helpers.h"
-
 #include "Scale.h"
 #include "Utils.h"
+#include "arm_compute/core/utils/misc/utility.h"
 #include "support/ToolchainSupport.h"
 
 namespace arm_compute
@@ -119,8 +118,8 @@
                     }
                     else if(border_mode == BorderMode::REPLICATE)
                     {
-                        id.set(0, clamp(static_cast<int>(x_src), 0, width - 1));
-                        id.set(1, clamp(static_cast<int>(y_src), 0, height - 1));
+                        id.set(0, utility::clamp<int>(x_src, 0, width - 1));
+                        id.set(1, utility::clamp<int>(y_src, 0, height - 1));
                         out[element_idx] = in[coord2index(in.shape(), id)];
                     }
                 }