arm_compute v18.05
diff --git a/utils/GraphTypePrinter.h b/utils/GraphTypePrinter.h
deleted file mode 100644
index 4ff1019..0000000
--- a/utils/GraphTypePrinter.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_TEST_GRAPH_TYPE_PRINTER_H__
-#define __ARM_COMPUTE_TEST_GRAPH_TYPE_PRINTER_H__
-
-#include "arm_compute/graph/Types.h"
-
-#include <ostream>
-#include <sstream>
-#include <string>
-
-namespace arm_compute
-{
-namespace graph
-{
-/** Formatted output of the @ref ConvolutionMethodHint type. */
-inline ::std::ostream &operator<<(::std::ostream &os, const ConvolutionMethodHint &conv_method)
-{
-    switch(conv_method)
-    {
-        case ConvolutionMethodHint::DIRECT:
-            os << "DIRECT";
-            break;
-        case ConvolutionMethodHint::GEMM:
-            os << "GEMM";
-            break;
-        default:
-            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
-    }
-
-    return os;
-}
-
-inline std::string to_string(const ConvolutionMethodHint &conv_method)
-{
-    std::stringstream str;
-    str << conv_method;
-    return str.str();
-}
-
-/** Formatted output of the @ref TargetHint type. */
-inline ::std::ostream &operator<<(::std::ostream &os, const TargetHint &target_hint)
-{
-    switch(target_hint)
-    {
-        case TargetHint::NEON:
-            os << "NEON";
-            break;
-        case TargetHint::OPENCL:
-            os << "OPENCL";
-            break;
-        default:
-            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
-    }
-
-    return os;
-}
-
-inline std::string to_string(const TargetHint &target_hint)
-{
-    std::stringstream str;
-    str << target_hint;
-    return str.str();
-}
-} // namespace graph
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_GRAPH_TYPE_PRINTER_H__ */
diff --git a/utils/GraphUtils.cpp b/utils/GraphUtils.cpp
index 448343a..0edb6f2 100644
--- a/utils/GraphUtils.cpp
+++ b/utils/GraphUtils.cpp
@@ -24,18 +24,36 @@
 
 #include "utils/GraphUtils.h"
 
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/SubTensor.h"
 #include "utils/Utils.h"
 
-#ifdef ARM_COMPUTE_CL
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#endif /* ARM_COMPUTE_CL */
-
 #include <iomanip>
 
 using namespace arm_compute::graph_utils;
 
+namespace
+{
+std::pair<arm_compute::TensorShape, arm_compute::PermutationVector> compute_permutation_paramaters(const arm_compute::TensorShape &shape,
+                                                                                                   arm_compute::DataLayout data_layout)
+{
+    // Set permutation parameters if needed
+    arm_compute::TensorShape       permuted_shape = shape;
+    arm_compute::PermutationVector perm;
+    // Permute only if num_dimensions greater than 2
+    if(shape.num_dimensions() > 2)
+    {
+        perm = (data_layout == arm_compute::DataLayout::NHWC) ? arm_compute::PermutationVector(2U, 0U, 1U) : arm_compute::PermutationVector(1U, 2U, 0U);
+
+        arm_compute::PermutationVector perm_shape = (data_layout == arm_compute::DataLayout::NCHW) ? arm_compute::PermutationVector(2U, 0U, 1U) : arm_compute::PermutationVector(1U, 2U, 0U);
+        arm_compute::permute(permuted_shape, perm_shape);
+    }
+
+    return std::make_pair(permuted_shape, perm);
+}
+} // namespace
+
 void TFPreproccessor::preprocess(ITensor &tensor)
 {
     Window window;
@@ -111,6 +129,45 @@
     return ret;
 }
 
+NumPyAccessor::NumPyAccessor(std::string npy_path, TensorShape shape, DataType data_type, std::ostream &output_stream)
+    : _npy_tensor(), _filename(std::move(npy_path)), _output_stream(output_stream)
+{
+    NumPyBinLoader loader(_filename);
+
+    TensorInfo info(shape, 1, data_type);
+    _npy_tensor.allocator()->init(info);
+    _npy_tensor.allocator()->allocate();
+
+    loader.access_tensor(_npy_tensor);
+}
+
+template <typename T>
+void NumPyAccessor::access_numpy_tensor(ITensor &tensor)
+{
+    const int num_elements          = tensor.info()->total_size();
+    int       num_mismatches        = utils::compare_tensor<T>(tensor, _npy_tensor);
+    float     percentage_mismatches = static_cast<float>(num_mismatches) / num_elements;
+
+    _output_stream << "Results: " << 100.f - (percentage_mismatches * 100) << " % matches with the provided output[" << _filename << "]." << std::endl;
+}
+
+bool NumPyAccessor::access_tensor(ITensor &tensor)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&tensor, 1, DataType::F32);
+    ARM_COMPUTE_ERROR_ON(_npy_tensor.info()->dimension(0) != tensor.info()->dimension(0));
+
+    switch(tensor.info()->data_type())
+    {
+        case DataType::F32:
+            access_numpy_tensor<float>(tensor);
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT SUPPORTED!");
+    }
+
+    return false;
+}
+
 PPMAccessor::PPMAccessor(std::string ppm_path, bool bgr, std::unique_ptr<IPreprocessor> preprocessor)
     : _ppm_path(std::move(ppm_path)), _bgr(bgr), _preprocessor(std::move(preprocessor))
 {
@@ -123,8 +180,15 @@
     // Open PPM file
     ppm.open(_ppm_path);
 
-    ARM_COMPUTE_ERROR_ON_MSG(ppm.width() != tensor.info()->dimension(0) || ppm.height() != tensor.info()->dimension(1),
-                             "Failed to load image file: dimensions [%d,%d] not correct, expected [%d,%d].", ppm.width(), ppm.height(), tensor.info()->dimension(0), tensor.info()->dimension(1));
+    // Get permutated shape and permutation parameters
+    TensorShape                    permuted_shape = tensor.info()->tensor_shape();
+    arm_compute::PermutationVector perm;
+    if(tensor.info()->data_layout() != DataLayout::NCHW)
+    {
+        std::tie(permuted_shape, perm) = compute_permutation_paramaters(tensor.info()->tensor_shape(), tensor.info()->data_layout());
+    }
+    ARM_COMPUTE_ERROR_ON_MSG(ppm.width() != permuted_shape.x() || ppm.height() != permuted_shape.y(),
+                             "Failed to load image file: dimensions [%d,%d] not correct, expected [%d,%d].", ppm.width(), ppm.height(), permuted_shape.x(), permuted_shape.y());
 
     // Fill the tensor with the PPM content (BGR)
     ppm.fill_planar_tensor(tensor, _bgr);
@@ -325,8 +389,8 @@
     return true;
 }
 
-NumPyBinLoader::NumPyBinLoader(std::string filename)
-    : _filename(std::move(filename))
+NumPyBinLoader::NumPyBinLoader(std::string filename, DataLayout file_layout)
+    : _filename(std::move(filename)), _file_layout(file_layout)
 {
 }
 
@@ -371,30 +435,57 @@
         }
     }
 
+    bool are_layouts_different = (_file_layout != tensor.info()->data_layout());
+
     // Validate tensor ranks
     ARM_COMPUTE_ERROR_ON_MSG(shape.size() != tensor_shape.num_dimensions(), "Tensor ranks mismatch");
 
+    // Set permutation parameters if needed
+    TensorShape                    permuted_shape = tensor_shape;
+    arm_compute::PermutationVector perm;
+    if(are_layouts_different)
+    {
+        std::tie(permuted_shape, perm) = compute_permutation_paramaters(tensor_shape, tensor.info()->data_layout());
+    }
+
     // Validate shapes
     for(size_t i = 0; i < shape.size(); ++i)
     {
-        ARM_COMPUTE_ERROR_ON_MSG(tensor_shape[i] != shape[i], "Tensor dimensions mismatch");
+        ARM_COMPUTE_ERROR_ON_MSG(permuted_shape[i] != shape[i], "Tensor dimensions mismatch");
     }
 
-    // Read data
-    if(tensor.info()->padding().empty() && (dynamic_cast<SubTensor *>(&tensor) == nullptr))
+    // Validate shapes and copy tensor
+    if(!are_layouts_different || perm.num_dimensions() <= 2)
     {
-        // If tensor has no padding read directly from stream.
-        stream.read(reinterpret_cast<char *>(tensor.buffer()), tensor.info()->total_size());
+        // Read data
+        if(tensor.info()->padding().empty() && (dynamic_cast<SubTensor *>(&tensor) == nullptr))
+        {
+            // If tensor has no padding read directly from stream.
+            stream.read(reinterpret_cast<char *>(tensor.buffer()), tensor.info()->total_size());
+        }
+        else
+        {
+            // If tensor has padding accessing tensor elements through execution window.
+            Window window;
+            window.use_tensor_dimensions(tensor_shape);
+
+            execute_window_loop(window, [&](const Coordinates & id)
+            {
+                stream.read(reinterpret_cast<char *>(tensor.ptr_to_element(id)), tensor.info()->element_size());
+            });
+        }
     }
     else
     {
         // If tensor has padding accessing tensor elements through execution window.
         Window window;
-        window.use_tensor_dimensions(tensor_shape);
+        window.use_tensor_dimensions(permuted_shape);
 
         execute_window_loop(window, [&](const Coordinates & id)
         {
-            stream.read(reinterpret_cast<char *>(tensor.ptr_to_element(id)), tensor.info()->element_size());
+            Coordinates coords(id);
+            arm_compute::permute(coords, perm);
+            stream.read(reinterpret_cast<char *>(tensor.ptr_to_element(coords)), tensor.info()->element_size());
         });
     }
     return true;
diff --git a/utils/GraphUtils.h b/utils/GraphUtils.h
index cc6f404..5977083 100644
--- a/utils/GraphUtils.h
+++ b/utils/GraphUtils.h
@@ -25,9 +25,11 @@
 #define __ARM_COMPUTE_GRAPH_UTILS_H__
 
 #include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/utils/misc/Utility.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/ITensorAccessor.h"
 #include "arm_compute/graph/Types.h"
+#include "arm_compute/runtime/Tensor.h"
 
 #include <array>
 #include <random>
@@ -42,7 +44,12 @@
 class IPreprocessor
 {
 public:
-    virtual ~IPreprocessor()                 = default;
+    /** Default destructor. */
+    virtual ~IPreprocessor() = default;
+    /** Preprocess the given tensor.
+     *
+     * @param[in] tensor Tensor to preprocess.
+     */
     virtual void preprocess(ITensor &tensor) = 0;
 };
 
@@ -112,6 +119,37 @@
     unsigned int _maximum;
 };
 
+/** NumPy accessor class */
+class NumPyAccessor final : public graph::ITensorAccessor
+{
+public:
+    /** Constructor
+     *
+     * @param[in]  npy_path      Path to npy file.
+     * @param[in]  shape         Shape of the numpy tensor data.
+     * @param[in]  data_type     DataType of the numpy tensor data.
+     * @param[out] output_stream (Optional) Output stream
+     */
+    NumPyAccessor(std::string npy_path, TensorShape shape, DataType data_type, std::ostream &output_stream = std::cout);
+    /** Allow instances of this class to be move constructed */
+    NumPyAccessor(NumPyAccessor &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NumPyAccessor(const NumPyAccessor &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NumPyAccessor &operator=(const NumPyAccessor &) = delete;
+
+    // Inherited methods overriden:
+    bool access_tensor(ITensor &tensor) override;
+
+private:
+    template <typename T>
+    void access_numpy_tensor(ITensor &tensor);
+
+    Tensor            _npy_tensor;
+    const std::string _filename;
+    std::ostream     &_output_stream;
+};
+
 /** PPM accessor class */
 class PPMAccessor final : public graph::ITensorAccessor
 {
@@ -196,9 +234,10 @@
 public:
     /** Default Constructor
      *
-     * @param filename Binary file name
+     * @param[in] filename    Binary file name
+     * @param[in] file_layout (Optional) Layout of the numpy tensor data. Defaults to NCHW
      */
-    NumPyBinLoader(std::string filename);
+    NumPyBinLoader(std::string filename, DataLayout file_layout = DataLayout::NCHW);
     /** Allows instances to move constructed */
     NumPyBinLoader(NumPyBinLoader &&) = default;
 
@@ -207,6 +246,7 @@
 
 private:
     const std::string _filename;
+    const DataLayout  _file_layout;
 };
 
 /** Generates appropriate random accessor
@@ -226,12 +266,15 @@
  *
  * @note If path is empty will generate a DummyAccessor else will generate a NumPyBinLoader
  *
- * @param[in] path      Path to the data files
- * @param[in] data_file Relative path to the data files from path
+ * @param[in] path        Path to the data files
+ * @param[in] data_file   Relative path to the data files from path
+ * @param[in] file_layout (Optional) Layout of file. Defaults to NCHW
  *
  * @return An appropriate tensor accessor
  */
-inline std::unique_ptr<graph::ITensorAccessor> get_weights_accessor(const std::string &path, const std::string &data_file)
+inline std::unique_ptr<graph::ITensorAccessor> get_weights_accessor(const std::string &path,
+                                                                    const std::string &data_file,
+                                                                    DataLayout         file_layout = DataLayout::NCHW)
 {
     if(path.empty())
     {
@@ -239,7 +282,7 @@
     }
     else
     {
-        return arm_compute::support::cpp14::make_unique<NumPyBinLoader>(path + data_file);
+        return arm_compute::support::cpp14::make_unique<NumPyBinLoader>(path + data_file, file_layout);
     }
 }
 
@@ -263,27 +306,14 @@
     }
     else
     {
-        return arm_compute::support::cpp14::make_unique<PPMAccessor>(ppm_path, bgr, std::move(preprocessor));
-    }
-}
-
-/** Utility function to return the TargetHint
- *
- * @param[in] target Integer value which expresses the selected target. Must be 0 for NEON, 1 for OpenCL or 2 for OpenCL with Tuner
- *
- * @return the TargetHint
- */
-inline graph::TargetHint set_target_hint(int target)
-{
-    ARM_COMPUTE_ERROR_ON_MSG(target > 2, "Invalid target. Target must be 0 (NEON), 1 (OpenCL) or 2 (OpenCL with Tuner)");
-    if((target == 1 || target == 2) && graph::Graph::opencl_is_available())
-    {
-        // If type of target is OpenCL, check if OpenCL is available and initialize the scheduler
-        return graph::TargetHint::OPENCL;
-    }
-    else
-    {
-        return graph::TargetHint::NEON;
+        if(arm_compute::utility::endswith(ppm_path, ".npy"))
+        {
+            return arm_compute::support::cpp14::make_unique<NumPyBinLoader>(ppm_path);
+        }
+        else
+        {
+            return arm_compute::support::cpp14::make_unique<PPMAccessor>(ppm_path, bgr, std::move(preprocessor));
+        }
     }
 }
 
@@ -308,6 +338,51 @@
         return arm_compute::support::cpp14::make_unique<TopNPredictionsAccessor>(labels_path, top_n, output_stream);
     }
 }
+/** Generates appropriate npy output accessor according to the specified npy_path
+ *
+ * @note If npy_path is empty will generate a DummyAccessor else will generate a NpyAccessor
+ *
+ * @param[in]  npy_path      Path to npy file.
+ * @param[in]  shape         Shape of the numpy tensor data.
+ * @param[in]  data_type     DataType of the numpy tensor data.
+ * @param[out] output_stream (Optional) Output stream
+ *
+ * @return An appropriate tensor accessor
+ */
+inline std::unique_ptr<graph::ITensorAccessor> get_npy_output_accessor(const std::string &npy_path, TensorShape shape, DataType data_type, std::ostream &output_stream = std::cout)
+{
+    if(npy_path.empty())
+    {
+        return arm_compute::support::cpp14::make_unique<DummyAccessor>(0);
+    }
+    else
+    {
+        return arm_compute::support::cpp14::make_unique<NumPyAccessor>(npy_path, shape, data_type, output_stream);
+    }
+}
+
+/** Utility function to return the TargetHint
+ *
+ * @param[in] target Integer value which expresses the selected target. Must be 0 for NEON or 1 for OpenCL or 2 (OpenCL with Tuner)
+ *
+ * @return the TargetHint
+ */
+inline graph::Target set_target_hint(int target)
+{
+    ARM_COMPUTE_ERROR_ON_MSG(target > 3, "Invalid target. Target must be 0 (NEON), 1 (OpenCL), 2 (OpenCL + Tuner), 3 (GLES)");
+    if((target == 1 || target == 2))
+    {
+        return graph::Target::CL;
+    }
+    else if(target == 3)
+    {
+        return graph::Target::GC;
+    }
+    else
+    {
+        return graph::Target::NEON;
+    }
+}
 } // namespace graph_utils
 } // namespace arm_compute
 
diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h
index 313fad1..be92489 100644
--- a/utils/TypePrinter.h
+++ b/utils/TypePrinter.h
@@ -27,6 +27,7 @@
 #include "arm_compute/core/CL/CLTypes.h"
 #include "arm_compute/core/Dimensions.h"
 #include "arm_compute/core/Error.h"
+#include "arm_compute/core/GPUTarget.h"
 #include "arm_compute/core/HOGInfo.h"
 #include "arm_compute/core/Size2D.h"
 #include "arm_compute/core/Strides.h"
@@ -41,7 +42,13 @@
 
 namespace arm_compute
 {
-/** Formatted output of the Dimensions type. */
+/** Formatted output of the Dimensions type.
+ *
+ * @param[out] os         Output stream.
+ * @param[in]  dimensions Type to output.
+ *
+ * @return Modified output stream.
+ */
 template <typename T>
 inline ::std::ostream &operator<<(::std::ostream &os, const Dimensions<T> &dimensions)
 {
@@ -58,7 +65,13 @@
     return os;
 }
 
-/** Formatted output of the NonLinearFilterFunction type. */
+/** Formatted output of the NonLinearFilterFunction type.
+ *
+ * @param[out] os       Output stream.
+ * @param[in]  function Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const NonLinearFilterFunction &function)
 {
     switch(function)
@@ -79,6 +92,12 @@
     return os;
 }
 
+/** Formatted output of the NonLinearFilterFunction type.
+ *
+ * @param[in] function Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const NonLinearFilterFunction &function)
 {
     std::stringstream str;
@@ -86,7 +105,13 @@
     return str.str();
 }
 
-/** Formatted output of the MatrixPattern type. */
+/** Formatted output of the MatrixPattern type.
+ *
+ * @param[out] os      Output stream.
+ * @param[in]  pattern Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const MatrixPattern &pattern)
 {
     switch(pattern)
@@ -110,6 +135,12 @@
     return os;
 }
 
+/** Formatted output of the MatrixPattern type.
+ *
+ * @param[in] pattern Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const MatrixPattern &pattern)
 {
     std::stringstream str;
@@ -117,7 +148,13 @@
     return str.str();
 }
 
-/** Formatted output of the RoundingPolicy type. */
+/** Formatted output of the RoundingPolicy type.
+ *
+ * @param[out] os              Output stream.
+ * @param[in]  rounding_policy Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const RoundingPolicy &rounding_policy)
 {
     switch(rounding_policy)
@@ -138,7 +175,13 @@
     return os;
 }
 
-/** Formatted output of the WeightsInfo type. */
+/** Formatted output of the WeightsInfo type.
+ *
+ * @param[out] os           Output stream.
+ * @param[in]  weights_info Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const WeightsInfo &weights_info)
 {
     os << weights_info.are_reshaped() << ";";
@@ -147,14 +190,26 @@
     return os;
 }
 
-/** Formatted output of the ROIPoolingInfo type. */
+/** Formatted output of the ROIPoolingInfo type.
+ *
+ * @param[out] os        Output stream.
+ * @param[in]  pool_info Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const ROIPoolingLayerInfo &pool_info)
 {
     os << pool_info.pooled_width() << "x" << pool_info.pooled_height() << "~" << pool_info.spatial_scale();
     return os;
 }
 
-/** Formatted output of the QuantizationInfo type. */
+/** Formatted output of the QuantizationInfo type.
+ *
+ * @param[out] os                Output stream.
+ * @param[in]  quantization_info Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const QuantizationInfo &quantization_info)
 {
     os << "Scale:" << quantization_info.scale << "~"
@@ -162,6 +217,12 @@
     return os;
 }
 
+/** Formatted output of the QuantizationInfo type.
+ *
+ * @param[in] quantization_info Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const QuantizationInfo &quantization_info)
 {
     std::stringstream str;
@@ -169,6 +230,13 @@
     return str.str();
 }
 
+/** Formatted output of the FixedPointOp type.
+ *
+ * @param[out] os Output stream.
+ * @param[in]  op Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const FixedPointOp &op)
 {
     switch(op)
@@ -201,6 +269,12 @@
     return os;
 }
 
+/** Formatted output of the FixedPointOp type.
+ *
+ * @param[in] op Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const FixedPointOp &op)
 {
     std::stringstream str;
@@ -208,7 +282,13 @@
     return str.str();
 }
 
-/** Formatted output of the activation function type. */
+/** Formatted output of the activation function type.
+ *
+ * @param[out] os           Output stream.
+ * @param[in]  act_function Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const ActivationLayerInfo::ActivationFunction &act_function)
 {
     switch(act_function)
@@ -253,13 +333,28 @@
     return os;
 }
 
+/** Formatted output of the activation function info type.
+ *
+ * @param[in] info Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const arm_compute::ActivationLayerInfo &info)
 {
     std::stringstream str;
-    str << info.activation();
+    if(info.enabled())
+    {
+        str << info.activation();
+    }
     return str.str();
 }
 
+/** Formatted output of the activation function type.
+ *
+ * @param[in] function Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const arm_compute::ActivationLayerInfo::ActivationFunction &function)
 {
     std::stringstream str;
@@ -267,7 +362,13 @@
     return str.str();
 }
 
-/** Formatted output of the NormType type. */
+/** Formatted output of the NormType type.
+ *
+ * @param[out] os        Output stream.
+ * @param[in]  norm_type Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const NormType &norm_type)
 {
     switch(norm_type)
@@ -288,6 +389,12 @@
     return os;
 }
 
+/** Formatted output of @ref NormalizationLayerInfo.
+ *
+ * @param[in] info Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const arm_compute::NormalizationLayerInfo &info)
 {
     std::stringstream str;
@@ -295,14 +402,26 @@
     return str.str();
 }
 
-/** Formatted output of @ref NormalizationLayerInfo. */
+/** Formatted output of @ref NormalizationLayerInfo.
+ *
+ * @param[out] os   Output stream.
+ * @param[in]  info Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const NormalizationLayerInfo &info)
 {
     os << info.type() << ":NormSize=" << info.norm_size();
     return os;
 }
 
-/** Formatted output of the PoolingType type. */
+/** Formatted output of the PoolingType type.
+ *
+ * @param[out] os        Output stream.
+ * @param[in]  pool_type Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const PoolingType &pool_type)
 {
     switch(pool_type)
@@ -323,7 +442,13 @@
     return os;
 }
 
-/** Formatted output of @ref PoolingLayerInfo. */
+/** Formatted output of @ref PoolingLayerInfo.
+ *
+ * @param[out] os   Output stream.
+ * @param[in]  info Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const PoolingLayerInfo &info)
 {
     os << info.pool_type();
@@ -331,6 +456,12 @@
     return os;
 }
 
+/** Formatted output of @ref RoundingPolicy.
+ *
+ * @param[in] rounding_policy Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const RoundingPolicy &rounding_policy)
 {
     std::stringstream str;
@@ -338,7 +469,53 @@
     return str.str();
 }
 
-/** Formatted output of the DataType type. */
+/** Formatted output of the DataLayout type.
+ *
+ * @param[out] os          Output stream.
+ * @param[in]  data_layout Type to output.
+ *
+ * @return Modified output stream.
+ */
+inline ::std::ostream &operator<<(::std::ostream &os, const DataLayout &data_layout)
+{
+    switch(data_layout)
+    {
+        case DataLayout::UNKNOWN:
+            os << "UNKNOWN";
+            break;
+        case DataLayout::NHWC:
+            os << "NHWC";
+            break;
+        case DataLayout::NCHW:
+            os << "NCHW";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+    }
+
+    return os;
+}
+
+/** Formatted output of the DataLayout type.
+ *
+ * @param[in] data_layout Type to output.
+ *
+ * @return Formatted string.
+ */
+inline std::string to_string(const arm_compute::DataLayout &data_layout)
+{
+    std::stringstream str;
+    str << data_layout;
+    return str.str();
+}
+
+/** Formatted output of the DataType type.
+ *
+ * @param[out] os        Output stream.
+ * @param[in]  data_type Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const DataType &data_type)
 {
     switch(data_type)
@@ -398,6 +575,12 @@
     return os;
 }
 
+/** Formatted output of the DataType type.
+ *
+ * @param[in] data_type Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const arm_compute::DataType &data_type)
 {
     std::stringstream str;
@@ -405,7 +588,13 @@
     return str.str();
 }
 
-/** Formatted output of the Format type. */
+/** Formatted output of the Format type.
+ *
+ * @param[out] os     Output stream.
+ * @param[in]  format Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const Format &format)
 {
     switch(format)
@@ -468,6 +657,12 @@
     return os;
 }
 
+/** Formatted output of the Format type.
+ *
+ * @param[in] format Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const Format &format)
 {
     std::stringstream str;
@@ -475,7 +670,13 @@
     return str.str();
 }
 
-/** Formatted output of the Channel type. */
+/** Formatted output of the Channel type.
+ *
+ * @param[out] os      Output stream.
+ * @param[in]  channel Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const Channel &channel)
 {
     switch(channel)
@@ -523,6 +724,12 @@
     return os;
 }
 
+/** Formatted output of the Channel type.
+ *
+ * @param[in] channel Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const Channel &channel)
 {
     std::stringstream str;
@@ -530,7 +737,13 @@
     return str.str();
 }
 
-/** Formatted output of the BorderMode type. */
+/** Formatted output of the BorderMode type.
+ *
+ * @param[out] os   Output stream.
+ * @param[in]  mode Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const BorderMode &mode)
 {
     switch(mode)
@@ -551,7 +764,13 @@
     return os;
 }
 
-/** Formatted output of the BorderSize type. */
+/** Formatted output of the BorderSize type.
+ *
+ * @param[out] os     Output stream.
+ * @param[in]  border Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const BorderSize &border)
 {
     os << border.top << ","
@@ -562,7 +781,13 @@
     return os;
 }
 
-/** Formatted output of the InterpolationPolicy type. */
+/** Formatted output of the InterpolationPolicy type.
+ *
+ * @param[out] os     Output stream.
+ * @param[in]  policy Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const InterpolationPolicy &policy)
 {
     switch(policy)
@@ -583,7 +808,13 @@
     return os;
 }
 
-/** Formatted output of the SamplingPolicy type. */
+/** Formatted output of the SamplingPolicy type.
+ *
+ * @param[out] os     Output stream.
+ * @param[in]  policy Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const SamplingPolicy &policy)
 {
     switch(policy)
@@ -601,7 +832,12 @@
     return os;
 }
 
-/** Formatted output of the TensorInfo type. */
+/** Formatted output of the TensorInfo type.
+ *
+ * @param[in] info Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const TensorInfo &info)
 {
     std::stringstream str;
@@ -612,6 +848,12 @@
     return str.str();
 }
 
+/** Formatted output of the Dimensions type.
+ *
+ * @param[in] dimensions Type to output.
+ *
+ * @return Formatted string.
+ */
 template <typename T>
 inline std::string to_string(const Dimensions<T> &dimensions)
 {
@@ -620,6 +862,12 @@
     return str.str();
 }
 
+/** Formatted output of the Strides type.
+ *
+ * @param[in] stride Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const Strides &stride)
 {
     std::stringstream str;
@@ -627,7 +875,12 @@
     return str.str();
 }
 
-/** Formatted output of the TensorShape type. */
+/** Formatted output of the TensorShape type.
+ *
+ * @param[in] shape Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const TensorShape &shape)
 {
     std::stringstream str;
@@ -635,7 +888,12 @@
     return str.str();
 }
 
-/** Formatted output of the Coordinates type. */
+/** Formatted output of the Coordinates type.
+ *
+ * @param[in] coord Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const Coordinates &coord)
 {
     std::stringstream str;
@@ -643,7 +901,13 @@
     return str.str();
 }
 
-/** Formatted output of the Rectangle type. */
+/** Formatted output of the Rectangle type.
+ *
+ * @param[out] os   Output stream.
+ * @param[in]  rect Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const Rectangle &rect)
 {
     os << rect.width << "x" << rect.height;
@@ -652,7 +916,13 @@
     return os;
 }
 
-/** Formatted output of the PadStridInfo type. */
+/** Formatted output of the PadStrideInfo type.
+ *
+ * @param[out] os              Output stream.
+ * @param[in]  pad_stride_info Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const PadStrideInfo &pad_stride_info)
 {
     os << pad_stride_info.stride().first << "," << pad_stride_info.stride().second;
@@ -663,6 +933,12 @@
     return os;
 }
 
+/** Formatted output of the PadStrideInfo type.
+ *
+ * @param[in] pad_stride_info Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const PadStrideInfo &pad_stride_info)
 {
     std::stringstream str;
@@ -670,6 +946,12 @@
     return str.str();
 }
 
+/** Formatted output of the BorderMode type.
+ *
+ * @param[in] mode Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const BorderMode &mode)
 {
     std::stringstream str;
@@ -677,6 +959,12 @@
     return str.str();
 }
 
+/** Formatted output of the BorderSize type.
+ *
+ * @param[in] border Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const BorderSize &border)
 {
     std::stringstream str;
@@ -684,6 +972,12 @@
     return str.str();
 }
 
+/** Formatted output of the InterpolationPolicy type.
+ *
+ * @param[in] policy Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const InterpolationPolicy &policy)
 {
     std::stringstream str;
@@ -691,6 +985,12 @@
     return str.str();
 }
 
+/** Formatted output of the SamplingPolicy type.
+ *
+ * @param[in] policy Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const SamplingPolicy &policy)
 {
     std::stringstream str;
@@ -698,7 +998,13 @@
     return str.str();
 }
 
-/** Formatted output of the ConversionPolicy type. */
+/** Formatted output of the ConvertPolicy type.
+ *
+ * @param[out] os     Output stream.
+ * @param[in]  policy Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const ConvertPolicy &policy)
 {
     switch(policy)
@@ -723,7 +1029,13 @@
     return str.str();
 }
 
-/** Formatted output of the Reduction Operations. */
+/** Formatted output of the Reduction Operations.
+ *
+ * @param[out] os Output stream.
+ * @param[in]  op Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const ReductionOperation &op)
 {
     switch(op)
@@ -738,6 +1050,12 @@
     return os;
 }
 
+/** Formatted output of the Reduction Operations.
+ *
+ * @param[in] op Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const ReductionOperation &op)
 {
     std::stringstream str;
@@ -745,6 +1063,12 @@
     return str.str();
 }
 
+/** Formatted output of the Norm Type.
+ *
+ * @param[in] type Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const NormType &type)
 {
     std::stringstream str;
@@ -752,6 +1076,12 @@
     return str.str();
 }
 
+/** Formatted output of the Pooling Type.
+ *
+ * @param[in] type Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const PoolingType &type)
 {
     std::stringstream str;
@@ -759,6 +1089,12 @@
     return str.str();
 }
 
+/** Formatted output of the Pooling Layer Info.
+ *
+ * @param[in] info Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const PoolingLayerInfo &info)
 {
     std::stringstream str;
@@ -774,7 +1110,13 @@
     return str.str();
 }
 
-/** Formatted output of the KeyPoint type. */
+/** Formatted output of the KeyPoint type.
+ *
+ * @param[out] os    Output stream
+ * @param[in]  point Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const KeyPoint &point)
 {
     os << "{x=" << point.x << ","
@@ -788,7 +1130,13 @@
     return os;
 }
 
-/** Formatted output of the PhaseType type. */
+/** Formatted output of the PhaseType type.
+ *
+ * @param[out] os         Output stream
+ * @param[in]  phase_type Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const PhaseType &phase_type)
 {
     switch(phase_type)
@@ -806,6 +1154,12 @@
     return os;
 }
 
+/** Formatted output of the PhaseType type.
+ *
+ * @param[in] type Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const arm_compute::PhaseType &type)
 {
     std::stringstream str;
@@ -813,7 +1167,13 @@
     return str.str();
 }
 
-/** Formatted output of the MagnitudeType type. */
+/** Formatted output of the MagnitudeType type.
+ *
+ * @param[out] os             Output stream
+ * @param[in]  magnitude_type Type to output.
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const MagnitudeType &magnitude_type)
 {
     switch(magnitude_type)
@@ -831,6 +1191,12 @@
     return os;
 }
 
+/** Formatted output of the MagnitudeType type.
+ *
+ * @param[in] type Type to output.
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const arm_compute::MagnitudeType &type)
 {
     std::stringstream str;
@@ -838,7 +1204,13 @@
     return str.str();
 }
 
-/** Formatted output of the GradientDimension type. */
+/** Formatted output of the GradientDimension type.
+ *
+ * @param[out] os  Output stream
+ * @param[in]  dim Type to output
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const GradientDimension &dim)
 {
     switch(dim)
@@ -859,6 +1231,12 @@
     return os;
 }
 
+/** Formatted output of the GradientDimension type.
+ *
+ * @param[in] type Type to output
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const arm_compute::GradientDimension &type)
 {
     std::stringstream str;
@@ -866,7 +1244,13 @@
     return str.str();
 }
 
-/** Formatted output of the HOGNormType type. */
+/** Formatted output of the HOGNormType type.
+ *
+ * @param[out] os        Output stream
+ * @param[in]  norm_type Type to output
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const HOGNormType &norm_type)
 {
     switch(norm_type)
@@ -887,6 +1271,12 @@
     return os;
 }
 
+/** Formatted output of the HOGNormType type.
+ *
+ * @param[in] type Type to output
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const HOGNormType &type)
 {
     std::stringstream str;
@@ -894,7 +1284,13 @@
     return str.str();
 }
 
-/** Formatted output of the Size2D type. */
+/** Formatted output of the Size2D type.
+ *
+ * @param[out] os   Output stream
+ * @param[in]  size Type to output
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const Size2D &size)
 {
     os << size.width << "x" << size.height;
@@ -902,6 +1298,12 @@
     return os;
 }
 
+/** Formatted output of the Size2D type.
+ *
+ * @param[in] type Type to output
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const Size2D &type)
 {
     std::stringstream str;
@@ -909,7 +1311,13 @@
     return str.str();
 }
 
-/** Formatted output of the Size2D type. */
+/** Formatted output of the HOGInfo type.
+ *
+ * @param[out] os       Output stream
+ * @param[in]  hog_info Type to output
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const HOGInfo &hog_info)
 {
     os << "{CellSize=" << hog_info.cell_size() << ","
@@ -924,7 +1332,12 @@
     return os;
 }
 
-/** Formatted output of the HOGInfo type. */
+/** Formatted output of the HOGInfo type.
+ *
+ * @param[in] type Type to output
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const HOGInfo &type)
 {
     std::stringstream str;
@@ -932,6 +1345,13 @@
     return str.str();
 }
 
+/** Formatted output of the ConvolutionMethod type.
+ *
+ * @param[out] os          Output stream
+ * @param[in]  conv_method Type to output
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const ConvolutionMethod &conv_method)
 {
     switch(conv_method)
@@ -952,6 +1372,12 @@
     return os;
 }
 
+/** Formatted output of the ConvolutionMethod type.
+ *
+ * @param[in] conv_method Type to output
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const ConvolutionMethod &conv_method)
 {
     std::stringstream str;
@@ -959,6 +1385,13 @@
     return str.str();
 }
 
+/** Formatted output of the GPUTarget type.
+ *
+ * @param[out] os         Output stream
+ * @param[in]  gpu_target Type to output
+ *
+ * @return Modified output stream.
+ */
 inline ::std::ostream &operator<<(::std::ostream &os, const GPUTarget &gpu_target)
 {
     switch(gpu_target)
@@ -981,8 +1414,29 @@
         case GPUTarget::T800:
             os << "T800";
             break;
-        case GPUTarget::G70:
-            os << "G70";
+        case GPUTarget::G71:
+            os << "G71";
+            break;
+        case GPUTarget::G72:
+            os << "G72";
+            break;
+        case GPUTarget::G51:
+            os << "G51";
+            break;
+        case GPUTarget::G51BIG:
+            os << "G51BIG";
+            break;
+        case GPUTarget::G51LIT:
+            os << "G51LIT";
+            break;
+        case GPUTarget::TNOX:
+            os << "TNOX";
+            break;
+        case GPUTarget::TTRX:
+            os << "TTRX";
+            break;
+        case GPUTarget::TBOX:
+            os << "TBOX";
             break;
         default:
             ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
@@ -991,11 +1445,107 @@
     return os;
 }
 
+/** Formatted output of the GPUTarget type.
+ *
+ * @param[in] gpu_target Type to output
+ *
+ * @return Formatted string.
+ */
 inline std::string to_string(const GPUTarget &gpu_target)
 {
     std::stringstream str;
     str << gpu_target;
     return str.str();
 }
+
+/** Formatted output of the DetectionWindow type.
+ *
+ * @param[out] os               Output stream
+ * @param[in]  detection_window Type to output
+ *
+ * @return Modified output stream.
+ */
+inline ::std::ostream &operator<<(::std::ostream &os, const DetectionWindow &detection_window)
+{
+    os << "{x=" << detection_window.x << ","
+       << "y=" << detection_window.y << ","
+       << "width=" << detection_window.width << ","
+       << "height=" << detection_window.height << ","
+       << "idx_class=" << detection_window.idx_class << ","
+       << "score=" << detection_window.score << "}";
+
+    return os;
+}
+
+/** Formatted output of the DetectionWindow type.
+ *
+ * @param[in] detection_window Type to output
+ *
+ * @return Formatted string.
+ */
+inline std::string to_string(const DetectionWindow &detection_window)
+{
+    std::stringstream str;
+    str << detection_window;
+    return str.str();
+}
+
+/** Formatted output of the Termination type.
+ *
+ * @param[out] os          Output stream
+ * @param[in]  termination Type to output
+ *
+ * @return Modified output stream.
+ */
+inline ::std::ostream &operator<<(::std::ostream &os, const Termination &termination)
+{
+    switch(termination)
+    {
+        case Termination::TERM_CRITERIA_EPSILON:
+            os << "TERM_CRITERIA_EPSILON";
+            break;
+        case Termination::TERM_CRITERIA_ITERATIONS:
+            os << "TERM_CRITERIA_ITERATIONS";
+            break;
+        case Termination::TERM_CRITERIA_BOTH:
+            os << "TERM_CRITERIA_BOTH";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+    }
+
+    return os;
+}
+
+/** Formatted output of the Termination type.
+ *
+ * @param[in] termination Type to output
+ *
+ * @return Formatted string.
+ */
+inline std::string to_string(const Termination &termination)
+{
+    std::stringstream str;
+    str << termination;
+    return str.str();
+}
+
+/** Formatted output of the WinogradInfo type. */
+inline ::std::ostream &operator<<(::std::ostream &os, const WinogradInfo &info)
+{
+    os << "{OutputTileSize=" << info.output_tile_size << ","
+       << "KernelSize=" << info.kernel_size << ","
+       << "PadStride=" << info.convolution_info << ","
+       << "OutputDataLayout=" << info.output_data_layout << "}";
+
+    return os;
+}
+
+inline std::string to_string(const WinogradInfo &type)
+{
+    std::stringstream str;
+    str << type;
+    return str.str();
+}
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_TEST_TYPE_PRINTER_H__ */
diff --git a/utils/Utils.cpp b/utils/Utils.cpp
index 8a2d118..a5c6a95 100644
--- a/utils/Utils.cpp
+++ b/utils/Utils.cpp
@@ -67,16 +67,16 @@
 } // namespace
 
 #ifndef BENCHMARK_EXAMPLES
-int run_example(int argc, char **argv, Example &example)
+int run_example(int argc, char **argv, std::unique_ptr<Example> example)
 {
     std::cout << "\n"
               << argv[0] << "\n\n";
 
     try
     {
-        example.do_setup(argc, argv);
-        example.do_run();
-        example.do_teardown();
+        example->do_setup(argc, argv);
+        example->do_run();
+        example->do_teardown();
 
         std::cout << "\nTest passed\n";
         return 0;
diff --git a/utils/Utils.h b/utils/Utils.h
index 4cce39f..f6f59b0 100644
--- a/utils/Utils.h
+++ b/utils/Utils.h
@@ -62,8 +62,15 @@
 class Example
 {
 public:
+    /** Setup the example.
+     *
+     * @param[in] argc Argument count.
+     * @param[in] argv Argument values.
+     */
     virtual void do_setup(int argc, char **argv) {};
+    /** Run the example. */
     virtual void do_run() {};
+    /** Teardown the example. */
     virtual void do_teardown() {};
 
     /** Default destructor. */
@@ -76,13 +83,12 @@
  * @param[in] argv    Command line arguments
  * @param[in] example Example to run
  */
-int run_example(int argc, char **argv, Example &example);
+int run_example(int argc, char **argv, std::unique_ptr<Example> example);
 
 template <typename T>
 int run_example(int argc, char **argv)
 {
-    T example;
-    return run_example(argc, argv, example);
+    return run_example(argc, argv, support::cpp14::make_unique<T>());
 }
 
 /** Draw a RGB rectangular window for the detected object
@@ -400,7 +406,14 @@
     {
         ARM_COMPUTE_ERROR_ON(!is_open());
         ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&tensor, 1, DataType::U8, DataType::F32);
-        ARM_COMPUTE_ERROR_ON(tensor.info()->dimension(0) != _width || tensor.info()->dimension(1) != _height || tensor.info()->dimension(2) != 3);
+
+        const DataLayout  data_layout  = tensor.info()->data_layout();
+        const TensorShape tensor_shape = tensor.info()->tensor_shape();
+
+        ARM_COMPUTE_UNUSED(tensor_shape);
+        ARM_COMPUTE_ERROR_ON(tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH)] != _width);
+        ARM_COMPUTE_ERROR_ON(tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT)] != _height);
+        ARM_COMPUTE_ERROR_ON(tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL)] != 3);
 
         try
         {
@@ -417,11 +430,25 @@
                                      "Not enough data in file");
             ARM_COMPUTE_UNUSED(end_position);
 
+            // Stride across channels
+            size_t stride_z = 0;
+
             // Iterate through every pixel of the image
             arm_compute::Window window;
-            window.set(arm_compute::Window::DimX, arm_compute::Window::Dimension(0, _width, 1));
-            window.set(arm_compute::Window::DimY, arm_compute::Window::Dimension(0, _height, 1));
-            window.set(arm_compute::Window::DimZ, arm_compute::Window::Dimension(0, 1, 1));
+            if(data_layout == DataLayout::NCHW)
+            {
+                window.set(arm_compute::Window::DimX, arm_compute::Window::Dimension(0, _width, 1));
+                window.set(arm_compute::Window::DimY, arm_compute::Window::Dimension(0, _height, 1));
+                window.set(arm_compute::Window::DimZ, arm_compute::Window::Dimension(0, 1, 1));
+                stride_z = tensor.info()->strides_in_bytes()[2];
+            }
+            else
+            {
+                window.set(arm_compute::Window::DimX, arm_compute::Window::Dimension(0, 1, 1));
+                window.set(arm_compute::Window::DimY, arm_compute::Window::Dimension(0, _width, 1));
+                window.set(arm_compute::Window::DimZ, arm_compute::Window::Dimension(0, _height, 1));
+                stride_z = tensor.info()->strides_in_bytes()[0];
+            }
 
             arm_compute::Iterator out(&tensor, window);
 
@@ -429,8 +456,6 @@
             unsigned char green = 0;
             unsigned char blue  = 0;
 
-            size_t stride_z = tensor.info()->strides_in_bytes()[2];
-
             arm_compute::execute_window_loop(window, [&](const arm_compute::Coordinates & id)
             {
                 red   = _fs.get();
@@ -489,9 +514,11 @@
     unsigned int  _width, _height;
 };
 
+/** Numpy data loader */
 class NPYLoader
 {
 public:
+    /** Default constructor */
     NPYLoader()
         : _fs(), _shape(), _fortran_order(false), _typestring()
     {
@@ -857,11 +884,8 @@
     std::random_device rd;
     std::mt19937       gen(rd());
 
-    TensorShape shape(tensor.info()->dimension(0), tensor.info()->dimension(1));
-
     Window window;
-    window.set(Window::DimX, Window::Dimension(0, shape.x(), 1));
-    window.set(Window::DimY, Window::Dimension(0, shape.y(), 1));
+    window.use_tensor_dimensions(tensor.info()->tensor_shape());
 
     map(tensor, true);
 
@@ -900,6 +924,43 @@
  * @return The free memory in kB
  */
 uint64_t get_mem_free_from_meminfo();
+
+/** Compare to tensor
+ *
+ * @param[in] tensor1 First tensor to be compared.
+ * @param[in] tensor2 Second tensor to be compared.
+ *
+ * @return The number of mismatches
+ */
+template <typename T>
+int compare_tensor(ITensor &tensor1, ITensor &tensor2)
+{
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(&tensor1, &tensor2);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(&tensor1, &tensor2);
+
+    int    num_mismatches = 0;
+    Window window;
+    window.use_tensor_dimensions(tensor1.info()->tensor_shape());
+
+    map(tensor1, true);
+    map(tensor2, true);
+    Iterator itensor1(&tensor1, window);
+    Iterator itensor2(&tensor2, window);
+
+    execute_window_loop(window, [&](const Coordinates & id)
+    {
+        if(std::abs(*reinterpret_cast<T *>(itensor1.ptr()) - *reinterpret_cast<T *>(itensor2.ptr())) > 0.00001)
+        {
+            ++num_mismatches;
+        }
+    },
+    itensor1, itensor2);
+
+    unmap(itensor1);
+    unmap(itensor2);
+
+    return num_mismatches;
+}
 } // namespace utils
 } // namespace arm_compute
 #endif /* __UTILS_UTILS_H__*/