arm_compute v18.08
diff --git a/examples/SConscript b/examples/SConscript
index c3576fb..098dd5b 100644
--- a/examples/SConscript
+++ b/examples/SConscript
@@ -36,22 +36,24 @@
     Import('arm_compute_a')
     Import('arm_compute_core_a')
     arm_compute_libs = [ arm_compute_a, arm_compute_core_a ]
+    arm_compute_graph_libs = arm_compute_libs # The graph library needs to be linked separately with --whole-archive
     arm_compute_dependency = arm_compute_a
     graph_dependency = [arm_compute_graph_a]
 else:
     Import('arm_compute_graph_so')
     Import('arm_compute_so')
-    arm_compute_libs = ["arm_compute_graph", "arm_compute", "arm_compute_core"]
+    arm_compute_libs = ["arm_compute", "arm_compute_core"]
+    arm_compute_graph_libs = [ "arm_compute_graph" ] + arm_compute_libs
     arm_compute_dependency = arm_compute_so
     graph_dependency = [arm_compute_graph_so]
 
 # Build graph examples
 graph_utils = examples_env.Object("../utils/GraphUtils.cpp")
+graph_utils += examples_env.Object("../utils/CommonGraphOptions.cpp")
 examples_libs = examples_env.get("LIBS",[])
 for file in Glob("./graph_*.cpp"):
     example = os.path.basename(os.path.splitext(str(file))[0])
     prog = None
-    arm_compute_graph_libs = arm_compute_libs
 
     if env['os'] in ['android', 'bare_metal'] or env['standalone']:
         prog = examples_env.Program(example, ["{}.cpp".format(example), utils, graph_utils], LIBS = examples_libs + arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+['-Wl,--whole-archive',graph_dependency,'-Wl,--no-whole-archive'])
diff --git a/examples/cl_convolution.cpp b/examples/cl_convolution.cpp
index 24ad7c1..b15bbb6 100644
--- a/examples/cl_convolution.cpp
+++ b/examples/cl_convolution.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2018 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,6 +28,7 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLFunctions.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
+#include "utils/ImageLoader.h"
 #include "utils/Utils.h"
 
 using namespace arm_compute;
@@ -56,7 +57,7 @@
 class CLConvolutionExample : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    bool do_setup(int argc, char **argv) override
     {
         PPMLoader ppm;
 
@@ -94,6 +95,8 @@
             ppm.fill_image(src);
             output_filename = std::string(argv[1]) + "_out.ppm";
         }
+
+        return true;
     }
     void do_run() override
     {
diff --git a/examples/cl_events.cpp b/examples/cl_events.cpp
index ec02fc3..a9c508a 100644
--- a/examples/cl_events.cpp
+++ b/examples/cl_events.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,6 +28,7 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLFunctions.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
+#include "utils/ImageLoader.h"
 #include "utils/Utils.h"
 
 using namespace arm_compute;
@@ -36,7 +37,7 @@
 class CLEventsExample : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    bool do_setup(int argc, char **argv) override
     {
         /** [OpenCL events] **/
         PPMLoader     ppm;
@@ -83,6 +84,8 @@
             output_filename = std::string(argv[1]) + "_out.ppm";
         }
         /** [OpenCL events] **/
+
+        return true;
     }
     void do_run() override
     {
diff --git a/examples/cl_sgemm.cpp b/examples/cl_sgemm.cpp
index fa57885..805aec1 100644
--- a/examples/cl_sgemm.cpp
+++ b/examples/cl_sgemm.cpp
@@ -39,7 +39,7 @@
 class CLSGEMMExample : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    bool do_setup(int argc, char **argv) override
     {
         NPYLoader npy0, npy1, npy2;
         alpha = 1.0f;
@@ -164,6 +164,8 @@
 
         // Dummy run for CLTuner
         sgemm.run();
+
+        return true;
     }
     void do_run() override
     {
diff --git a/examples/gc_absdiff.cpp b/examples/gc_absdiff.cpp
index 32f946d..f534592 100644
--- a/examples/gc_absdiff.cpp
+++ b/examples/gc_absdiff.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,6 +29,7 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h"
 #include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#include "utils/ImageLoader.h"
 #include "utils/Utils.h"
 
 using namespace arm_compute;
@@ -37,7 +38,7 @@
 class GCAbsDiffExample : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    bool do_setup(int argc, char **argv) override
     {
         PPMLoader ppm1, ppm2;
 
@@ -89,6 +90,8 @@
         {
             ppm2.fill_image(src2);
         }
+
+        return true;
     }
     void do_run() override
     {
diff --git a/examples/gc_dc.cpp b/examples/gc_dc.cpp
index 8b6f441..f3f1942 100644
--- a/examples/gc_dc.cpp
+++ b/examples/gc_dc.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,7 +36,7 @@
 class GCDCExample : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    bool do_setup(int argc, char **argv) override
     {
         ARM_COMPUTE_UNUSED(argc);
         ARM_COMPUTE_UNUSED(argv);
@@ -86,6 +86,8 @@
             *reinterpret_cast<half_float::half *>(it.ptr()) = half_float::half(1.f);
         });
         src.unmap();
+
+        return true;
     }
     void do_run() override
     {
diff --git a/examples/graph_alexnet.cpp b/examples/graph_alexnet.cpp
index 9e6d919..124f672 100644
--- a/examples/graph_alexnet.cpp
+++ b/examples/graph_alexnet.cpp
@@ -23,13 +23,10 @@
  */
 #include "arm_compute/graph.h"
 #include "support/ToolchainSupport.h"
+#include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
 #include "utils/Utils.h"
 
-#include <cstdlib>
-#include <iostream>
-#include <memory>
-
 using namespace arm_compute::utils;
 using namespace arm_compute::graph::frontend;
 using namespace arm_compute::graph_utils;
@@ -37,79 +34,63 @@
 /** Example demonstrating how to implement AlexNet's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 class GraphAlexnetExample : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    GraphAlexnetExample()
+        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "AlexNet")
     {
-        std::string data_path; /* Path to the trainable data */
-        std::string image;     /* Image data */
-        std::string label;     /* Label data */
+    }
+    bool do_setup(int argc, char **argv) override
+    {
+        // Parse arguments
+        cmd_parser.parse(argc, argv);
+
+        // Consume common parameters
+        common_params = consume_common_graph_parameters(common_opts);
+
+        // Return when help menu is requested
+        if(common_params.help)
+        {
+            cmd_parser.print_help(argv[0]);
+            return false;
+        }
+
+        // Set default layout if needed
+        if(!common_opts.data_layout->is_set() && common_params.target == Target::NEON)
+        {
+            common_params.data_layout = DataLayout::NCHW;
+        }
+
+        // Checks
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+
+        // Print parameter values
+        std::cout << common_params << std::endl;
+
+        // Get trainable parameters data path
+        std::string data_path = common_params.data_path;
 
         // Create a preprocessor object
         const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } };
         std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb);
 
-        // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
-        const int target      = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
-        Target    target_hint = set_target_hint(target);
+        // Create input descriptor
+        const TensorShape tensor_shape     = permute_shape(TensorShape(227U, 227U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
 
-        const bool        is_neon              = (target_hint == Target::NEON);
-        ConvolutionMethod convolution_5x5_hint = is_neon ? ConvolutionMethod::GEMM : ConvolutionMethod::DIRECT;
-        ConvolutionMethod convolution_3x3_hint = ConvolutionMethod::DEFAULT;
-        FastMathHint      fast_math_hint       = FastMathHint::DISABLED;
+        // Set weights trained layout
+        const DataLayout weights_layout = DataLayout::NCHW;
 
-        // Parse arguments
-        if(argc < 2)
-        {
-            // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 2)
-        {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 3)
-        {
-            data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No image provided: using random values\n\n";
-        }
-        else if(argc == 4)
-        {
-            data_path = argv[2];
-            image     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
-            std::cout << "No text file with labels provided: skipping output accessor\n\n";
-        }
-        else if(argc == 5)
-        {
-            data_path = argv[2];
-            image     = argv[3];
-            label     = argv[4];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
-            std::cout << "No fast math info provided: disabling fast math\n\n";
-        }
-        else
-        {
-            data_path      = argv[2];
-            image          = argv[3];
-            label          = argv[4];
-            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
-        }
-
-        graph << target_hint
-              << fast_math_hint
-              << InputLayer(TensorDescriptor(TensorShape(227U, 227U, 3U, 1U), DataType::F32),
-                            get_input_accessor(image, std::move(preprocessor)))
+        graph << common_params.target
+              << common_params.fast_math_hint
+              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
               // Layer 1
               << ConvolutionLayer(
                   11U, 11U, 96U,
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_b.npy"),
                   PadStrideInfo(4, 4, 0, 0))
               .set_name("conv1")
@@ -117,21 +98,19 @@
               << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("norm1")
               << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0))).set_name("pool1")
               // Layer 2
-              << convolution_5x5_hint
               << ConvolutionLayer(
                   5U, 5U, 256U,
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_b.npy"),
                   PadStrideInfo(1, 1, 2, 2), 2)
               .set_name("conv2")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu2")
               << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("norm2")
               << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0))).set_name("pool2")
-              << convolution_3x3_hint
               // Layer 3
               << ConvolutionLayer(
                   3U, 3U, 384U,
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv3")
@@ -139,7 +118,7 @@
               // Layer 4
               << ConvolutionLayer(
                   3U, 3U, 384U,
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_b.npy"),
                   PadStrideInfo(1, 1, 1, 1), 2)
               .set_name("conv4")
@@ -147,7 +126,7 @@
               // Layer 5
               << ConvolutionLayer(
                   3U, 3U, 256U,
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_b.npy"),
                   PadStrideInfo(1, 1, 1, 1), 2)
               .set_name("conv5")
@@ -156,31 +135,36 @@
               // Layer 6
               << FullyConnectedLayer(
                   4096U,
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_b.npy"))
               .set_name("fc6")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu6")
               // Layer 7
               << FullyConnectedLayer(
                   4096U,
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_b.npy"))
               .set_name("fc7")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu7")
               // Layer 8
               << FullyConnectedLayer(
                   1000U,
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_b.npy"))
               .set_name("fc8")
               // Softmax
               << SoftmaxLayer().set_name("prob")
-              << OutputLayer(get_output_accessor(label, 5));
+              << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
-        config.use_tuner = (target == 2);
-        graph.finalize(target_hint, config);
+        config.num_threads = common_params.threads;
+        config.use_tuner   = common_params.enable_tuner;
+        config.tuner_file  = common_params.tuner_file;
+
+        graph.finalize(common_params.target, config);
+
+        return true;
     }
     void do_run() override
     {
@@ -189,13 +173,20 @@
     }
 
 private:
-    Stream graph{ 0, "AlexNet" };
+    CommandLineParser  cmd_parser;
+    CommonGraphOptions common_opts;
+    CommonGraphParams  common_params;
+    Stream             graph;
 };
 
 /** Main program for AlexNet
  *
+ * @note To list all the possible arguments execute the binary appended with the --help option
+ *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
+ *
+ * @return Return code
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_googlenet.cpp b/examples/graph_googlenet.cpp
index 2dba67f..cdbb8d8 100644
--- a/examples/graph_googlenet.cpp
+++ b/examples/graph_googlenet.cpp
@@ -23,12 +23,10 @@
  */
 #include "arm_compute/graph.h"
 #include "support/ToolchainSupport.h"
+#include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
 #include "utils/Utils.h"
 
-#include <cstdlib>
-#include <tuple>
-
 using namespace arm_compute::utils;
 using namespace arm_compute::graph::frontend;
 using namespace arm_compute::graph_utils;
@@ -36,74 +34,57 @@
 /** Example demonstrating how to implement Googlenet's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 class GraphGooglenetExample : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    GraphGooglenetExample()
+        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "GoogleNet")
     {
-        std::string data_path; /* Path to the trainable data */
-        std::string image;     /* Image data */
-        std::string label;     /* Label data */
+    }
+    bool do_setup(int argc, char **argv) override
+    {
+        // Parse arguments
+        cmd_parser.parse(argc, argv);
+
+        // Consume common parameters
+        common_params = consume_common_graph_parameters(common_opts);
+
+        // Return when help menu is requested
+        if(common_params.help)
+        {
+            cmd_parser.print_help(argv[0]);
+            return false;
+        }
+
+        // Checks
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+        ARM_COMPUTE_EXIT_ON_MSG(common_params.data_type == DataType::F16 && common_params.target == Target::NEON, "F16 NEON not supported for this graph");
+
+        // Print parameter values
+        std::cout << common_params << std::endl;
+
+        // Get trainable parameters data path
+        std::string data_path = common_params.data_path;
 
         // Create a preprocessor object
         const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } };
         std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb);
 
-        // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
-        const int    target         = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
-        Target       target_hint    = set_target_hint(target);
-        FastMathHint fast_math_hint = FastMathHint::DISABLED;
+        // Create input descriptor
+        const TensorShape tensor_shape     = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
 
-        // Parse arguments
-        if(argc < 2)
-        {
-            // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 2)
-        {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 3)
-        {
-            data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No image provided: using random values\n\n";
-        }
-        else if(argc == 4)
-        {
-            data_path = argv[2];
-            image     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
-            std::cout << "No text file with labels provided: skipping output accessor\n\n";
-        }
-        else if(argc == 5)
-        {
-            data_path = argv[2];
-            image     = argv[3];
-            label     = argv[4];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
-            std::cout << "No fast math info provided: disabling fast math\n\n";
-        }
-        else
-        {
-            data_path      = argv[2];
-            image          = argv[3];
-            label          = argv[4];
-            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
-        }
+        // Set weights trained layout
+        const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << target_hint
-              << fast_math_hint
-              << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32),
-                            get_input_accessor(image, std::move(preprocessor)))
+        graph << common_params.target
+              << common_params.fast_math_hint
+              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
               << ConvolutionLayer(
                   7U, 7U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_b.npy"),
                   PadStrideInfo(2, 2, 3, 3))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
@@ -111,41 +92,46 @@
               << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f))
               << ConvolutionLayer(
                   1U, 1U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
               << ConvolutionLayer(
                   3U, 3U, 192U,
-                  get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
               << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f))
               << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)));
-        graph << get_inception_node(data_path, "inception_3a", 64, std::make_tuple(96U, 128U), std::make_tuple(16U, 32U), 32U);
-        graph << get_inception_node(data_path, "inception_3b", 128, std::make_tuple(128U, 192U), std::make_tuple(32U, 96U), 64U);
+        graph << get_inception_node(data_path, "inception_3a", weights_layout, 64, std::make_tuple(96U, 128U), std::make_tuple(16U, 32U), 32U);
+        graph << get_inception_node(data_path, "inception_3b", weights_layout, 128, std::make_tuple(128U, 192U), std::make_tuple(32U, 96U), 64U);
         graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)));
-        graph << get_inception_node(data_path, "inception_4a", 192, std::make_tuple(96U, 208U), std::make_tuple(16U, 48U), 64U);
-        graph << get_inception_node(data_path, "inception_4b", 160, std::make_tuple(112U, 224U), std::make_tuple(24U, 64U), 64U);
-        graph << get_inception_node(data_path, "inception_4c", 128, std::make_tuple(128U, 256U), std::make_tuple(24U, 64U), 64U);
-        graph << get_inception_node(data_path, "inception_4d", 112, std::make_tuple(144U, 288U), std::make_tuple(32U, 64U), 64U);
-        graph << get_inception_node(data_path, "inception_4e", 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U);
+        graph << get_inception_node(data_path, "inception_4a", weights_layout, 192, std::make_tuple(96U, 208U), std::make_tuple(16U, 48U), 64U);
+        graph << get_inception_node(data_path, "inception_4b", weights_layout, 160, std::make_tuple(112U, 224U), std::make_tuple(24U, 64U), 64U);
+        graph << get_inception_node(data_path, "inception_4c", weights_layout, 128, std::make_tuple(128U, 256U), std::make_tuple(24U, 64U), 64U);
+        graph << get_inception_node(data_path, "inception_4d", weights_layout, 112, std::make_tuple(144U, 288U), std::make_tuple(32U, 64U), 64U);
+        graph << get_inception_node(data_path, "inception_4e", weights_layout, 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U);
         graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)));
-        graph << get_inception_node(data_path, "inception_5a", 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U);
-        graph << get_inception_node(data_path, "inception_5b", 384, std::make_tuple(192U, 384U), std::make_tuple(48U, 128U), 128U);
+        graph << get_inception_node(data_path, "inception_5a", weights_layout, 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U);
+        graph << get_inception_node(data_path, "inception_5b", weights_layout, 384, std::make_tuple(192U, 384U), std::make_tuple(48U, 128U), 128U);
         graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 7, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL)))
               << FullyConnectedLayer(
                   1000U,
-                  get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_b.npy"))
               << SoftmaxLayer()
-              << OutputLayer(get_output_accessor(label, 5));
+              << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
-        config.use_tuner = (target == 2);
-        graph.finalize(target_hint, config);
+        config.num_threads = common_params.threads;
+        config.use_tuner   = common_params.enable_tuner;
+        config.tuner_file  = common_params.tuner_file;
+
+        graph.finalize(common_params.target, config);
+
+        return true;
     }
     void do_run() override
     {
@@ -154,9 +140,12 @@
     }
 
 private:
-    Stream graph{ 0, "GoogleNet" };
+    CommandLineParser  cmd_parser;
+    CommonGraphOptions common_opts;
+    CommonGraphParams  common_params;
+    Stream             graph;
 
-    BranchLayer get_inception_node(const std::string &data_path, std::string &&param_path,
+    BranchLayer get_inception_node(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
                                    unsigned int a_filt,
                                    std::tuple<unsigned int, unsigned int> b_filters,
                                    std::tuple<unsigned int, unsigned int> c_filters,
@@ -166,7 +155,7 @@
         SubStream   i_a(graph);
         i_a << ConvolutionLayer(
                 1U, 1U, a_filt,
-                get_weights_accessor(data_path, total_path + "1x1_w.npy"),
+                get_weights_accessor(data_path, total_path + "1x1_w.npy", weights_layout),
                 get_weights_accessor(data_path, total_path + "1x1_b.npy"),
                 PadStrideInfo(1, 1, 0, 0))
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
@@ -174,13 +163,13 @@
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
                 1U, 1U, std::get<0>(b_filters),
-                get_weights_accessor(data_path, total_path + "3x3_reduce_w.npy"),
+                get_weights_accessor(data_path, total_path + "3x3_reduce_w.npy", weights_layout),
                 get_weights_accessor(data_path, total_path + "3x3_reduce_b.npy"),
                 PadStrideInfo(1, 1, 0, 0))
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(
                 3U, 3U, std::get<1>(b_filters),
-                get_weights_accessor(data_path, total_path + "3x3_w.npy"),
+                get_weights_accessor(data_path, total_path + "3x3_w.npy", weights_layout),
                 get_weights_accessor(data_path, total_path + "3x3_b.npy"),
                 PadStrideInfo(1, 1, 1, 1))
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
@@ -188,13 +177,13 @@
         SubStream i_c(graph);
         i_c << ConvolutionLayer(
                 1U, 1U, std::get<0>(c_filters),
-                get_weights_accessor(data_path, total_path + "5x5_reduce_w.npy"),
+                get_weights_accessor(data_path, total_path + "5x5_reduce_w.npy", weights_layout),
                 get_weights_accessor(data_path, total_path + "5x5_reduce_b.npy"),
                 PadStrideInfo(1, 1, 0, 0))
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(
                 5U, 5U, std::get<1>(c_filters),
-                get_weights_accessor(data_path, total_path + "5x5_w.npy"),
+                get_weights_accessor(data_path, total_path + "5x5_w.npy", weights_layout),
                 get_weights_accessor(data_path, total_path + "5x5_b.npy"),
                 PadStrideInfo(1, 1, 2, 2))
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
@@ -203,7 +192,7 @@
         i_d << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)))
             << ConvolutionLayer(
                 1U, 1U, d_filt,
-                get_weights_accessor(data_path, total_path + "pool_proj_w.npy"),
+                get_weights_accessor(data_path, total_path + "pool_proj_w.npy", weights_layout),
                 get_weights_accessor(data_path, total_path + "pool_proj_b.npy"),
                 PadStrideInfo(1, 1, 0, 0))
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
@@ -214,8 +203,10 @@
 
 /** Main program for Googlenet
  *
+ * @note To list all the possible arguments execute the binary appended with the --help option
+ *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_inception_v3.cpp b/examples/graph_inception_v3.cpp
index d1d6ab4..168a506 100644
--- a/examples/graph_inception_v3.cpp
+++ b/examples/graph_inception_v3.cpp
@@ -23,12 +23,10 @@
  */
 #include "arm_compute/graph.h"
 #include "support/ToolchainSupport.h"
+#include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
 #include "utils/Utils.h"
 
-#include <cstdlib>
-#include <tuple>
-
 using namespace arm_compute::utils;
 using namespace arm_compute::graph::frontend;
 using namespace arm_compute::graph_utils;
@@ -36,72 +34,61 @@
 /** Example demonstrating how to implement InceptionV3's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 class InceptionV3Example : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    InceptionV3Example()
+        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "InceptionV3")
     {
-        std::string data_path; /* Path to the trainable data */
-        std::string image;     /* Image data */
-        std::string label;     /* Label data */
+    }
+    bool do_setup(int argc, char **argv) override
+    {
+        // Parse arguments
+        cmd_parser.parse(argc, argv);
+
+        // Consume common parameters
+        common_params = consume_common_graph_parameters(common_opts);
+
+        // Return when help menu is requested
+        if(common_params.help)
+        {
+            cmd_parser.print_help(argv[0]);
+            return false;
+        }
+
+        // Set default layout if needed
+        if(!common_opts.data_layout->is_set() && common_params.target == Target::NEON)
+        {
+            common_params.data_layout = DataLayout::NCHW;
+        }
+
+        // Checks
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+        ARM_COMPUTE_EXIT_ON_MSG(common_params.data_type == DataType::F16 && common_params.target == Target::NEON, "F16 NEON not supported for this graph");
+
+        // Print parameter values
+        std::cout << common_params << std::endl;
+
+        // Get trainable parameters data path
+        std::string data_path = common_params.data_path;
 
         // Create a preprocessor object
         std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>();
 
-        // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
-        const int    target         = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
-        Target       target_hint    = set_target_hint(target);
-        FastMathHint fast_math_hint = FastMathHint::DISABLED;
+        // Create input descriptor
+        const TensorShape tensor_shape     = permute_shape(TensorShape(299U, 299U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
 
-        // Parse arguments
-        if(argc < 2)
-        {
-            // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 2)
-        {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 3)
-        {
-            data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No image provided: using random values\n\n";
-        }
-        else if(argc == 4)
-        {
-            data_path = argv[2];
-            image     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
-            std::cout << "No text file with labels provided: skipping output accessor\n\n";
-        }
-        else if(argc == 5)
-        {
-            data_path = argv[2];
-            image     = argv[3];
-            label     = argv[4];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
-            std::cout << "No fast math info provided: disabling fast math\n\n";
-        }
-        else
-        {
-            data_path      = argv[2];
-            image          = argv[3];
-            label          = argv[4];
-            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
-        }
+        // Set weights trained layout
+        const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << target_hint
-              << fast_math_hint
-              << InputLayer(TensorDescriptor(TensorShape(299U, 299U, 3U, 1U), DataType::F32),
-                            get_input_accessor(image, std::move(preprocessor), false))
+        graph << common_params.target
+              << common_params.fast_math_hint
+              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false))
               << ConvolutionLayer(3U, 3U, 32U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_weights.npy"),
+                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_weights.npy", weights_layout),
                                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
               .set_name("Conv2d_1a_3x3/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path,
@@ -114,7 +101,7 @@
               .set_name("Conv2d_1a_3x3/BatchNorm/batchnorm")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_1a_3x3/Relu")
               << ConvolutionLayer(3U, 3U, 32U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_weights.npy"),
+                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_weights.npy", weights_layout),
                                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
               .set_name("Conv2d_2a_3x3/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path,
@@ -128,7 +115,7 @@
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2a_3x3/Relu")
 
               << ConvolutionLayer(3U, 3U, 64U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_weights.npy"),
+                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_weights.npy", weights_layout),
                                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
               .set_name("Conv2d_2b_3x3/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path,
@@ -144,7 +131,7 @@
               << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("MaxPool_3a_3x3/MaxPool")
 
               << ConvolutionLayer(1U, 1U, 80U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_weights.npy"),
+                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_weights.npy", weights_layout),
                                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
               .set_name("Conv2d_3b_1x1/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path,
@@ -158,7 +145,7 @@
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_3b_1x1/Relu")
 
               << ConvolutionLayer(3U, 3U, 192U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_weights.npy"),
+                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_weights.npy", weights_layout),
                                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
               .set_name("Conv2d_4a_3x3/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path,
@@ -173,57 +160,62 @@
 
               << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("MaxPool_5a_3x3/MaxPool");
 
-        graph << get_inception_node_A(data_path, "Mixed_5b", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
+        graph << get_inception_node_A(data_path, "Mixed_5b", weights_layout, 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
                                       32U)
               .set_name("Mixed_5b/concat");
-        graph << get_inception_node_A(data_path, "Mixed_5c", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
+        graph << get_inception_node_A(data_path, "Mixed_5c", weights_layout, 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
                                       64U, true)
               .set_name("Mixed_5c/concat");
-        graph << get_inception_node_A(data_path, "Mixed_5d", 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
+        graph << get_inception_node_A(data_path, "Mixed_5d", weights_layout, 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
                                       64U)
               .set_name("Mixed_5d/concat");
 
-        graph << get_inception_node_B(data_path, "Mixed_6a", 384U, std::make_tuple(64U, 96U, 96U)).set_name("Mixed_6a/concat");
+        graph << get_inception_node_B(data_path, "Mixed_6a", weights_layout, 384U, std::make_tuple(64U, 96U, 96U)).set_name("Mixed_6a/concat");
 
-        graph << get_inception_node_C(data_path, "Mixed_6b", 192U, std::make_tuple(128U, 128U, 192U),
+        graph << get_inception_node_C(data_path, "Mixed_6b", weights_layout, 192U, std::make_tuple(128U, 128U, 192U),
                                       std::make_tuple(128U, 128U, 128U, 128U, 192U), 192U)
               .set_name("Mixed_6b/concat");
-        graph << get_inception_node_C(data_path, "Mixed_6c", 192U, std::make_tuple(160U, 160U, 192U),
+        graph << get_inception_node_C(data_path, "Mixed_6c", weights_layout, 192U, std::make_tuple(160U, 160U, 192U),
                                       std::make_tuple(160U, 160U, 160U, 160U, 192U), 192U)
               .set_name("Mixed_6c/concat");
-        graph << get_inception_node_C(data_path, "Mixed_6d", 192U, std::make_tuple(160U, 160U, 192U),
+        graph << get_inception_node_C(data_path, "Mixed_6d", weights_layout, 192U, std::make_tuple(160U, 160U, 192U),
                                       std::make_tuple(160U, 160U, 160U, 160U, 192U), 192U)
               .set_name("Mixed_6d/concat");
-        graph << get_inception_node_C(data_path, "Mixed_6e", 192U, std::make_tuple(192U, 192U, 192U),
+        graph << get_inception_node_C(data_path, "Mixed_6e", weights_layout, 192U, std::make_tuple(192U, 192U, 192U),
                                       std::make_tuple(192U, 192U, 192U, 192U, 192U), 192U)
               .set_name("Mixed_6e/concat");
 
-        graph << get_inception_node_D(data_path, "Mixed_7a", std::make_tuple(192U, 320U),
+        graph << get_inception_node_D(data_path, "Mixed_7a", weights_layout, std::make_tuple(192U, 320U),
                                       std::make_tuple(192U, 192U, 192U, 192U))
               .set_name("Mixed_7a/concat");
 
-        graph << get_inception_node_E(data_path, "Mixed_7b", 320U, std::make_tuple(384U, 384U, 384U),
+        graph << get_inception_node_E(data_path, "Mixed_7b", weights_layout, 320U, std::make_tuple(384U, 384U, 384U),
                                       std::make_tuple(448U, 384U, 384U, 384U), 192U)
               .set_name("Mixed_7b/concat");
-        graph << get_inception_node_E(data_path, "Mixed_7c", 320U, std::make_tuple(384U, 384U, 384U),
+        graph << get_inception_node_E(data_path, "Mixed_7c", weights_layout, 320U, std::make_tuple(384U, 384U, 384U),
                                       std::make_tuple(448U, 384U, 384U, 384U), 192U, true)
               .set_name("Mixed_7c/concat");
 
         graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 8, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL))).set_name("Logits/AvgPool_1a_8x8/AvgPool")
               << ConvolutionLayer(1U, 1U, 1001U, get_weights_accessor(data_path,
-                                                                      "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_weights.npy"),
+                                                                      "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_weights.npy", weights_layout),
                                   get_weights_accessor(data_path,
                                                        "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_biases.npy"),
                                   PadStrideInfo(1, 1, 0, 0))
               .set_name("Logits/Conv2d_1c_1x1/convolution")
               << ReshapeLayer(TensorShape(1001U)).set_name("Predictions/Reshape")
               << SoftmaxLayer().set_name("Predictions/Softmax")
-              << OutputLayer(get_output_accessor(label, 5));
+              << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
-        config.use_tuner = (target == 2);
-        graph.finalize(target_hint, config);
+        config.num_threads = common_params.threads;
+        config.use_tuner   = common_params.enable_tuner;
+        config.tuner_file  = common_params.tuner_file;
+
+        graph.finalize(common_params.target, config);
+
+        return true;
     }
 
     void do_run() override
@@ -232,10 +224,13 @@
     }
 
 private:
-    Stream graph{ 0, "InceptionV3" };
+    CommandLineParser  cmd_parser;
+    CommonGraphOptions common_opts;
+    CommonGraphParams  common_params;
+    Stream             graph;
 
 private:
-    BranchLayer get_inception_node_A(const std::string &data_path, std::string &&param_path,
+    BranchLayer get_inception_node_A(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
                                      unsigned int a_filt,
                                      std::tuple<unsigned int, unsigned int> b_filters,
                                      std::tuple<unsigned int, unsigned int, unsigned int> c_filters,
@@ -256,7 +251,7 @@
         SubStream i_a(graph);
         i_a << ConvolutionLayer(
                 1U, 1U, a_filt,
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
@@ -272,7 +267,7 @@
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
                 1U, 1U, std::get<0>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/convolution")
@@ -286,7 +281,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/Relu")
             << ConvolutionLayer(
                 5U, 5U, std::get<1>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 2, 2))
             .set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/convolution")
@@ -302,7 +297,7 @@
         SubStream i_c(graph);
         i_c << ConvolutionLayer(
                 1U, 1U, std::get<0>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution")
@@ -316,7 +311,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
                 3U, 3U, std::get<1>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 1, 1))
             .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution")
@@ -330,7 +325,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu")
             << ConvolutionLayer(
                 3U, 3U, std::get<2>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 1, 1))
             .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/convolution")
@@ -347,7 +342,7 @@
         i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
             << ConvolutionLayer(
                 1U, 1U, d_filt,
-                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution")
@@ -363,7 +358,7 @@
         return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
     }
 
-    BranchLayer get_inception_node_B(const std::string &data_path, std::string &&param_path,
+    BranchLayer get_inception_node_B(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
                                      unsigned int a_filt,
                                      std::tuple<unsigned int, unsigned int, unsigned int> b_filters)
     {
@@ -371,7 +366,7 @@
         SubStream   i_a(graph);
         i_a << ConvolutionLayer(
                 3U, 3U, a_filt,
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(2, 2, 0, 0))
             .set_name(param_path + "/Branch_0/Conv2d_1a_1x1/convolution")
@@ -387,7 +382,7 @@
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
                 1U, 1U, std::get<0>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
@@ -401,7 +396,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
                 3U, 3U, std::get<1>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 1, 1))
             .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/convolution")
@@ -415,7 +410,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_3x3/Relu")
             << ConvolutionLayer(
                 3U, 3U, std::get<2>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(2, 2, 0, 0))
             .set_name(param_path + "/Branch_1/Conv2d_1a_1x1/convolution")
@@ -434,7 +429,7 @@
         return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c));
     }
 
-    BranchLayer get_inception_node_C(const std::string &data_path, std::string &&param_path,
+    BranchLayer get_inception_node_C(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
                                      unsigned int a_filt,
                                      std::tuple<unsigned int, unsigned int, unsigned int> b_filters,
                                      std::tuple<unsigned int, unsigned int, unsigned int, unsigned int, unsigned int> c_filters,
@@ -444,7 +439,7 @@
         SubStream   i_a(graph);
         i_a << ConvolutionLayer(
                 1U, 1U, a_filt,
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
@@ -460,7 +455,7 @@
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
                 1U, 1U, std::get<0>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
@@ -474,7 +469,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
                 7U, 1U, std::get<1>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 3, 0))
             .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution")
@@ -488,7 +483,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu")
             << ConvolutionLayer(
                 1U, 7U, std::get<2>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 3))
             .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution")
@@ -504,7 +499,7 @@
         SubStream i_c(graph);
         i_c << ConvolutionLayer(
                 1U, 1U, std::get<0>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution")
@@ -518,7 +513,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
                 1U, 7U, std::get<1>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 3))
             .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/convolution")
@@ -532,7 +527,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_7x1/Relu")
             << ConvolutionLayer(
                 7U, 1U, std::get<2>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 3, 0))
             .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/convolution")
@@ -546,7 +541,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_1x7/Relu")
             << ConvolutionLayer(
                 1U, 7U, std::get<3>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 3))
             .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/convolution")
@@ -560,7 +555,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0d_7x1/Relu")
             << ConvolutionLayer(
                 7U, 1U, std::get<4>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 3, 0))
             .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/convolution")
@@ -577,7 +572,7 @@
         i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
             << ConvolutionLayer(
                 1U, 1U, d_filt,
-                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution")
@@ -593,15 +588,15 @@
         return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
     }
 
-    BranchLayer get_inception_node_D(const std::string &data_path, std::string &&param_path,
-                                     std::tuple<unsigned int, unsigned int>      a_filters,
+    BranchLayer get_inception_node_D(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
+                                     std::tuple<unsigned int, unsigned int> a_filters,
                                      std::tuple<unsigned int, unsigned int, unsigned int, unsigned int> b_filters)
     {
         std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_";
         SubStream   i_a(graph);
         i_a << ConvolutionLayer(
                 1U, 1U, std::get<0>(a_filters),
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
@@ -615,7 +610,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
                 3U, 3U, std::get<1>(a_filters),
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(2, 2, 0, 0))
             .set_name(param_path + "/Branch_0/Conv2d_1a_3x3/convolution")
@@ -631,7 +626,7 @@
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
                 1U, 1U, std::get<0>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
@@ -645,7 +640,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
                 7U, 1U, std::get<1>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 3, 0))
             .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution")
@@ -659,7 +654,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu")
             << ConvolutionLayer(
                 1U, 7U, std::get<2>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 3))
             .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution")
@@ -673,7 +668,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0c_7x1/Relu")
             << ConvolutionLayer(
                 3U, 3U, std::get<3>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(2, 2, 0, 0))
             .set_name(param_path + "/Branch_1/Conv2d_1a_3x3/convolution")
@@ -692,7 +687,7 @@
         return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c));
     }
 
-    BranchLayer get_inception_node_E(const std::string &data_path, std::string &&param_path,
+    BranchLayer get_inception_node_E(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
                                      unsigned int a_filt,
                                      std::tuple<unsigned int, unsigned int, unsigned int> b_filters,
                                      std::tuple<unsigned int, unsigned int, unsigned int, unsigned int> c_filters,
@@ -710,7 +705,7 @@
         SubStream   i_a(graph);
         i_a << ConvolutionLayer(
                 1U, 1U, a_filt,
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
@@ -726,7 +721,7 @@
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
                 1U, 1U, std::get<0>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
@@ -739,10 +734,10 @@
             .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm")
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu");
 
-        SubStream i_b1(static_cast<IStream &>(i_b));
+        SubStream i_b1(i_b);
         i_b1 << ConvolutionLayer(
                  3U, 1U, std::get<1>(b_filters),
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy"),
+                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy", weights_layout),
                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                  PadStrideInfo(1, 1, 1, 0))
              .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/convolution")
@@ -755,10 +750,10 @@
              .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/BatchNorm/batchnorm")
              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x3/Relu");
 
-        SubStream i_b2(static_cast<IStream &>(i_b));
+        SubStream i_b2(i_b);
         i_b2 << ConvolutionLayer(
                  1U, 3U, std::get<2>(b_filters),
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_weights.npy"),
+                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_weights.npy", weights_layout),
                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                  PadStrideInfo(1, 1, 0, 1))
              .set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/convolution")
@@ -777,7 +772,7 @@
         SubStream i_c(graph);
         i_c << ConvolutionLayer(
                 1U, 1U, std::get<0>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution")
@@ -791,7 +786,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
                 3U, 3U, std::get<1>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 1, 1))
             .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution")
@@ -804,10 +799,10 @@
             .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/BatchNorm/batchnorm")
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu");
 
-        SubStream i_c1(static_cast<IStream &>(i_c));
+        SubStream i_c1(i_c);
         i_c1 << ConvolutionLayer(
                  3U, 1U, std::get<2>(c_filters),
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy"),
+                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy", weights_layout),
                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                  PadStrideInfo(1, 1, 1, 0))
              .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/convolution")
@@ -820,10 +815,10 @@
              .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/BatchNorm/batchnorm")
              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_1x3/Relu");
 
-        SubStream i_c2(static_cast<IStream &>(i_c));
+        SubStream i_c2(i_c);
         i_c2 << ConvolutionLayer(
                  1U, 3U, std::get<3>(c_filters),
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_weights.npy"),
+                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_weights.npy", weights_layout),
                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                  PadStrideInfo(1, 1, 0, 1))
              .set_name(param_path + "/Branch_2/Conv2d_0d_3x1/convolution")
@@ -843,7 +838,7 @@
         i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
             << ConvolutionLayer(
                 1U, 1U, d_filt,
-                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution")
@@ -862,8 +857,10 @@
 
 /** Main program for Inception V3
  *
+ * @note To list all the possible arguments execute the binary appended with the --help option
+ *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_inception_v4.cpp b/examples/graph_inception_v4.cpp
index ed95baa..b6c28b4 100644
--- a/examples/graph_inception_v4.cpp
+++ b/examples/graph_inception_v4.cpp
@@ -23,12 +23,10 @@
  */
 #include "arm_compute/graph.h"
 #include "support/ToolchainSupport.h"
+#include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
 #include "utils/Utils.h"
 
-#include <cstdlib>
-#include <tuple>
-
 using namespace arm_compute::utils;
 using namespace arm_compute::graph::frontend;
 using namespace arm_compute::graph_utils;
@@ -36,76 +34,62 @@
 /** Example demonstrating how to implement InceptionV4's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 class InceptionV4Example final : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    InceptionV4Example()
+        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "InceptionV4")
     {
-        // Disabled the test for now because the process gets killed on Linux Firefly 32 bit even when using ConvolutionMethodHint::DIRECT.
-        // Needs to review/rework to run the code below.
-#if __aarch64__
-        std::string data_path; /* Path to the trainable data */
-        std::string image;     /* Image data */
-        std::string label;     /* Label data */
+    }
+    bool do_setup(int argc, char **argv) override
+    {
+        // Parse arguments
+        cmd_parser.parse(argc, argv);
+
+        // Consume common parameters
+        common_params = consume_common_graph_parameters(common_opts);
+
+        // Return when help menu is requested
+        if(common_params.help)
+        {
+            cmd_parser.print_help(argv[0]);
+            return false;
+        }
+
+        // Set default layout if needed
+        if(!common_opts.data_layout->is_set() && common_params.target == Target::NEON)
+        {
+            common_params.data_layout = DataLayout::NCHW;
+        }
+
+        // Checks
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+        ARM_COMPUTE_EXIT_ON_MSG(common_params.data_type == DataType::F16 && common_params.target == Target::NEON, "F16 NEON not supported for this graph");
+
+        // Print parameter values
+        std::cout << common_params << std::endl;
+
+        // Get trainable parameters data path
+        std::string data_path = common_params.data_path;
 
         // Create a preprocessor object
         std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>();
 
-        // Set target. 0 (NEON), 1 (OpenCL). By default it is NEON
-        const int    target         = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
-        Target       target_hint    = set_target_hint(target);
-        FastMathHint fast_math_hint = FastMathHint::DISABLED;
+        // Create input descriptor
+        const TensorShape tensor_shape     = permute_shape(TensorShape(299U, 299U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
 
-        // Parse arguments
-        if(argc < 2)
-        {
-            // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 2)
-        {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 3)
-        {
-            data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No image provided: using random values\n\n";
-        }
-        else if(argc == 4)
-        {
-            data_path = argv[2];
-            image     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
-            std::cout << "No text file with labels provided: skipping output accessor\n\n";
-        }
-        else if(argc == 5)
-        {
-            data_path = argv[2];
-            image     = argv[3];
-            label     = argv[4];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
-            std::cout << "No fast math info provided: disabling fast math\n\n";
-        }
-        else
-        {
-            data_path      = argv[2];
-            image          = argv[3];
-            label          = argv[4];
-            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
-        }
+        // Set weights trained layout
+        const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << target_hint
-              << fast_math_hint
-              << InputLayer(TensorDescriptor(TensorShape(299U, 299U, 3U, 1U), DataType::F32),
-                            get_input_accessor(image, std::move(preprocessor), false))
+        graph << common_params.target
+              << common_params.fast_math_hint
+              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false))
               // Conv2d_1a_3x3
               << ConvolutionLayer(3U, 3U, 32U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_weights.npy"),
+                                  get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_weights.npy", weights_layout),
                                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
               << BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
                                          get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
@@ -115,7 +99,7 @@
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
               // Conv2d_2a_3x3
               << ConvolutionLayer(3U, 3U, 32U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_weights.npy"),
+                                  get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_weights.npy", weights_layout),
                                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
               << BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_BatchNorm_moving_mean.npy"),
                                          get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_BatchNorm_moving_variance.npy"),
@@ -125,7 +109,7 @@
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
               // Conv2d_2b_3x3
               << ConvolutionLayer(3U, 3U, 64U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_weights.npy"),
+                                  get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_weights.npy", weights_layout),
                                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
               << BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_moving_mean.npy"),
                                          get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_moving_variance.npy"),
@@ -134,62 +118,63 @@
                                          0.001f)
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
 
-        graph << get_mixed_3a(data_path);
-        graph << get_mixed_4a(data_path);
-        graph << get_mixed_5a(data_path);
+        graph << get_mixed_3a(data_path, weights_layout);
+        graph << get_mixed_4a(data_path, weights_layout);
+        graph << get_mixed_5a(data_path, weights_layout);
         // 4 inception A blocks
-        graph << get_inceptionA_block(data_path, "Mixed_5b");
-        graph << get_inceptionA_block(data_path, "Mixed_5c");
-        graph << get_inceptionA_block(data_path, "Mixed_5d");
-        graph << get_inceptionA_block(data_path, "Mixed_5e");
+        graph << get_inceptionA_block(data_path, weights_layout, "Mixed_5b");
+        graph << get_inceptionA_block(data_path, weights_layout, "Mixed_5c");
+        graph << get_inceptionA_block(data_path, weights_layout, "Mixed_5d");
+        graph << get_inceptionA_block(data_path, weights_layout, "Mixed_5e");
         // reduction A block
-        graph << get_reductionA_block(data_path);
+        graph << get_reductionA_block(data_path, weights_layout);
         // 7 inception B blocks
-        graph << get_inceptionB_block(data_path, "Mixed_6b");
-        graph << get_inceptionB_block(data_path, "Mixed_6c");
-        graph << get_inceptionB_block(data_path, "Mixed_6d");
-        graph << get_inceptionB_block(data_path, "Mixed_6e");
-        graph << get_inceptionB_block(data_path, "Mixed_6f");
-        graph << get_inceptionB_block(data_path, "Mixed_6g");
-        graph << get_inceptionB_block(data_path, "Mixed_6h");
+        graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6b");
+        graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6c");
+        graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6d");
+        graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6e");
+        graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6f");
+        graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6g");
+        graph << get_inceptionB_block(data_path, weights_layout, "Mixed_6h");
         // reduction B block
-        graph << get_reductionB_block(data_path);
+        graph << get_reductionB_block(data_path, weights_layout);
         // 3 inception C blocks
-        graph << get_inceptionC_block(data_path, "Mixed_7b");
-        graph << get_inceptionC_block(data_path, "Mixed_7c");
-        graph << get_inceptionC_block(data_path, "Mixed_7d");
+        graph << get_inceptionC_block(data_path, weights_layout, "Mixed_7b");
+        graph << get_inceptionC_block(data_path, weights_layout, "Mixed_7c");
+        graph << get_inceptionC_block(data_path, weights_layout, "Mixed_7d");
         graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG))
               << FlattenLayer()
               << FullyConnectedLayer(
                   1001U,
-                  get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_weights.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_weights.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_biases.npy"))
               << SoftmaxLayer()
-              << OutputLayer(get_output_accessor(label, 5));
+              << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
-        config.use_tuner = (target == 2);
-        graph.finalize(target_hint, config);
-#else  /* __aarch64__ */
-        using namespace arm_compute;
-        ARM_COMPUTE_UNUSED(argc);
-        ARM_COMPUTE_UNUSED(argv);
-#endif /* __aarch64__ */
+        config.num_threads = common_params.threads;
+        config.use_tuner   = common_params.enable_tuner;
+        config.tuner_file  = common_params.tuner_file;
+
+        graph.finalize(common_params.target, config);
+
+        return true;
     }
 
     void do_run() override
     {
-#if __aarch64__
         graph.run();
-#endif /* __aarch64__ */
     }
 
 private:
-    Stream graph{ 0, "InceptionV4" };
+    CommandLineParser  cmd_parser;
+    CommonGraphOptions common_opts;
+    CommonGraphParams  common_params;
+    Stream             graph;
 
 private:
-    BranchLayer get_mixed_3a(const std::string &data_path)
+    BranchLayer get_mixed_3a(const std::string &data_path, DataLayout weights_layout)
     {
         std::string total_path = "/cnn_data/inceptionv4_model/Mixed_3a_";
 
@@ -198,7 +183,7 @@
 
         SubStream i_b(graph);
         i_b << ConvolutionLayer(3U, 3U, 96U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_BatchNorm_moving_variance.npy"),
@@ -210,13 +195,13 @@
         return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b));
     }
 
-    BranchLayer get_mixed_4a(const std::string &data_path)
+    BranchLayer get_mixed_4a(const std::string &data_path, DataLayout weights_layout)
     {
         std::string total_path = "/cnn_data/inceptionv4_model/Mixed_4a_";
 
         SubStream i_a(graph);
         i_a << ConvolutionLayer(1U, 1U, 64U,
-                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -225,7 +210,7 @@
                                        0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(3U, 3U, 96U,
-                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
@@ -236,7 +221,7 @@
 
         SubStream i_b(graph);
         i_b << ConvolutionLayer(1U, 1U, 64U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -245,7 +230,7 @@
                                        0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(7U, 1U, 64U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
@@ -254,7 +239,7 @@
                                        0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(1U, 7U, 64U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
@@ -263,7 +248,7 @@
                                        0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(3U, 3U, 96U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
@@ -275,13 +260,13 @@
         return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b));
     }
 
-    BranchLayer get_mixed_5a(const std::string &data_path)
+    BranchLayer get_mixed_5a(const std::string &data_path, DataLayout weights_layout)
     {
         std::string total_path = "/cnn_data/inceptionv4_model/Mixed_5a_";
 
         SubStream i_a(graph);
         i_a << ConvolutionLayer(3U, 3U, 192U,
-                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
@@ -296,13 +281,13 @@
         return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b));
     }
 
-    BranchLayer get_inceptionA_block(const std::string &data_path, std::string &&param_path)
+    BranchLayer get_inceptionA_block(const std::string &data_path, DataLayout weights_layout, std::string &&param_path)
     {
         std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_";
 
         SubStream i_a(graph);
         i_a << ConvolutionLayer(1U, 1U, 96U,
-                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -313,7 +298,7 @@
 
         SubStream i_b(graph);
         i_b << ConvolutionLayer(1U, 1U, 64U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -322,7 +307,7 @@
                                        0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(3U, 3U, 96U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
@@ -333,7 +318,7 @@
 
         SubStream i_c(graph);
         i_c << ConvolutionLayer(1U, 1U, 64U,
-                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -342,7 +327,7 @@
                                        0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(3U, 3U, 96U,
-                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
@@ -351,7 +336,7 @@
                                        0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(3U, 3U, 96U,
-                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"),
@@ -363,7 +348,7 @@
         SubStream i_d(graph);
         i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true))
             << ConvolutionLayer(1U, 1U, 96U,
-                                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
@@ -375,13 +360,13 @@
         return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
     }
 
-    BranchLayer get_reductionA_block(const std::string &data_path)
+    BranchLayer get_reductionA_block(const std::string &data_path, DataLayout weights_layout)
     {
         std::string total_path = "/cnn_data/inceptionv4_model/Mixed_6a_";
 
         SubStream i_a(graph);
         i_a << ConvolutionLayer(3U, 3U, 384U,
-                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
@@ -392,7 +377,7 @@
 
         SubStream i_b(graph);
         i_b << ConvolutionLayer(1U, 1U, 192U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -401,7 +386,7 @@
                                        0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(3U, 3U, 224U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
@@ -410,7 +395,7 @@
                                        0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(3U, 3U, 256U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
@@ -425,13 +410,13 @@
         return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c));
     }
 
-    BranchLayer get_inceptionB_block(const std::string &data_path, std::string &&param_path)
+    BranchLayer get_inceptionB_block(const std::string &data_path, DataLayout weights_layout, std::string &&param_path)
     {
         std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_";
 
         SubStream i_a(graph);
         i_a << ConvolutionLayer(1U, 1U, 384U,
-                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -442,7 +427,7 @@
 
         SubStream i_b(graph);
         i_b << ConvolutionLayer(1U, 1U, 192U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -451,7 +436,7 @@
                                        0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(7U, 1U, 224U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
@@ -460,7 +445,7 @@
                                        0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(1U, 7U, 256U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
@@ -471,7 +456,7 @@
 
         SubStream i_c(graph);
         i_c << ConvolutionLayer(1U, 1U, 192U,
-                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -480,7 +465,7 @@
                                        0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(1U, 7U, 192U,
-                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_variance.npy"),
@@ -489,7 +474,7 @@
                                        0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(7U, 1U, 224U,
-                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_variance.npy"),
@@ -498,7 +483,7 @@
                                        0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(1U, 7U, 224U,
-                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_variance.npy"),
@@ -507,7 +492,7 @@
                                        0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(7U, 1U, 256U,
-                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_variance.npy"),
@@ -519,7 +504,7 @@
         SubStream i_d(graph);
         i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true))
             << ConvolutionLayer(1U, 1U, 128U,
-                                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
@@ -531,13 +516,13 @@
         return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
     }
 
-    BranchLayer get_reductionB_block(const std::string &data_path)
+    BranchLayer get_reductionB_block(const std::string &data_path, DataLayout weights_layout)
     {
         std::string total_path = "/cnn_data/inceptionv4_model/Mixed_7a_";
 
         SubStream i_a(graph);
         i_a << ConvolutionLayer(1U, 1U, 192U,
-                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -546,7 +531,7 @@
                                        0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(3U, 3U, 192U,
-                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
@@ -557,7 +542,7 @@
 
         SubStream i_b(graph);
         i_b << ConvolutionLayer(1U, 1U, 256U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -566,7 +551,7 @@
                                        0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(7U, 1U, 256U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
@@ -575,7 +560,7 @@
                                        0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(1U, 7U, 320U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
@@ -584,7 +569,7 @@
                                        0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(3U, 3U, 320U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
@@ -599,13 +584,13 @@
         return BranchLayer(BranchMergeMethod::DEPTH_CONCATENATE, std::move(i_a), std::move(i_b), std::move(i_c));
     }
 
-    BranchLayer get_inceptionC_block(const std::string &data_path, std::string &&param_path)
+    BranchLayer get_inceptionC_block(const std::string &data_path, DataLayout weights_layout, std::string &&param_path)
     {
         std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_";
 
         SubStream i_a(graph);
         i_a << ConvolutionLayer(1U, 1U, 256U,
-                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
@@ -617,7 +602,7 @@
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
                 1U, 1U, 384U,
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             << BatchNormalizationLayer(
@@ -628,10 +613,10 @@
                 0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
 
-        SubStream i_b1(static_cast<IStream &>(i_b));
+        SubStream i_b1(i_b);
         i_b1 << ConvolutionLayer(
                  3U, 1U, 256U,
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy"),
+                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy", weights_layout),
                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                  PadStrideInfo(1, 1, 1, 0))
              << BatchNormalizationLayer(
@@ -642,10 +627,10 @@
                  0.001f)
              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
 
-        SubStream i_b2(static_cast<IStream &>(i_b));
+        SubStream i_b2(i_b);
         i_b2 << ConvolutionLayer(
                  1U, 3U, 256U,
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_weights.npy"),
+                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_weights.npy", weights_layout),
                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                  PadStrideInfo(1, 1, 0, 1))
              << BatchNormalizationLayer(
@@ -662,7 +647,7 @@
         SubStream i_c(graph);
         i_c << ConvolutionLayer(
                 1U, 1U, 384U,
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 0))
             << BatchNormalizationLayer(
@@ -674,7 +659,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(
                 1U, 3U, 448U,
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 0, 1))
             << BatchNormalizationLayer(
@@ -686,7 +671,7 @@
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
             << ConvolutionLayer(
                 3U, 1U, 512U,
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy"),
+                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy", weights_layout),
                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                 PadStrideInfo(1, 1, 1, 0))
             << BatchNormalizationLayer(
@@ -697,10 +682,10 @@
                 0.001f)
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
 
-        SubStream i_c1(static_cast<IStream &>(i_c));
+        SubStream i_c1(i_c);
         i_c1 << ConvolutionLayer(
                  3U, 1U, 256U,
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_weights.npy"),
+                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_weights.npy", weights_layout),
                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                  PadStrideInfo(1, 1, 1, 0))
              << BatchNormalizationLayer(
@@ -711,10 +696,10 @@
                  0.001f)
              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
 
-        SubStream i_c2(static_cast<IStream &>(i_c));
+        SubStream i_c2(i_c);
         i_c2 << ConvolutionLayer(
                  1U, 3U, 256U,
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_weights.npy"),
+                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_weights.npy", weights_layout),
                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                  PadStrideInfo(1, 1, 0, 1))
              << BatchNormalizationLayer(
@@ -731,7 +716,7 @@
         SubStream i_d(graph);
         i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true))
             << ConvolutionLayer(1U, 1U, 256U,
-                                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy"),
+                                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
                                        get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
@@ -746,8 +731,10 @@
 
 /** Main program for Inception V4
  *
+ * @note To list all the possible arguments execute the binary appended with the --help option
+ *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_lenet.cpp b/examples/graph_lenet.cpp
index 32c7582..6b9f302 100644
--- a/examples/graph_lenet.cpp
+++ b/examples/graph_lenet.cpp
@@ -22,13 +22,11 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph.h"
-
 #include "support/ToolchainSupport.h"
+#include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
 #include "utils/Utils.h"
 
-#include <cstdlib>
-
 using namespace arm_compute::utils;
 using namespace arm_compute::graph::frontend;
 using namespace arm_compute::graph_utils;
@@ -41,87 +39,83 @@
 class GraphLenetExample : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    GraphLenetExample()
+        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "LeNet")
     {
-        std::string  data_path;   /** Path to the trainable data */
-        unsigned int batches = 4; /** Number of batches */
-
-        // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
-        const int target      = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
-        Target    target_hint = set_target_hint(target);
-
-        FastMathHint fast_math_hint = FastMathHint::DISABLED;
-
+    }
+    bool do_setup(int argc, char **argv) override
+    {
         // Parse arguments
-        if(argc < 2)
+        cmd_parser.parse(argc, argv);
+
+        // Consume common parameters
+        common_params = consume_common_graph_parameters(common_opts);
+
+        // Return when help menu is requested
+        if(common_params.help)
         {
-            // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [batches] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
+            cmd_parser.print_help(argv[0]);
+            return false;
         }
-        else if(argc == 2)
-        {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [batches] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 3)
-        {
-            //Do something with argv[1]
-            data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " [path_to_data] [batches] [fast_math_hint]\n\n";
-            std::cout << "No number of batches where specified, thus will use the default : " << batches << "\n\n";
-        }
-        else if(argc == 4)
-        {
-            data_path = argv[2];
-            batches   = std::strtol(argv[3], nullptr, 0);
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [fast_math_hint]\n\n";
-            std::cout << "No fast math info provided: disabling fast math\n\n";
-        }
-        else
-        {
-            //Do something with argv[1] and argv[2]
-            data_path      = argv[2];
-            batches        = std::strtol(argv[3], nullptr, 0);
-            fast_math_hint = (std::strtol(argv[4], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
-        }
+
+        // Checks
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+
+        // Print parameter values
+        std::cout << common_params << std::endl;
+
+        // Get trainable parameters data path
+        std::string  data_path = common_params.data_path;
+        unsigned int batches   = 4; /** Number of batches */
+
+        // Create input descriptor
+        const TensorShape tensor_shape     = permute_shape(TensorShape(28U, 28U, 1U, batches), DataLayout::NCHW, common_params.data_layout);
+        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+
+        // Set weights trained layout
+        const DataLayout weights_layout = DataLayout::NCHW;
 
         //conv1 << pool1 << conv2 << pool2 << fc1 << act1 << fc2 << smx
-        graph << target_hint
-              << fast_math_hint
-              << InputLayer(TensorDescriptor(TensorShape(28U, 28U, 1U, batches), DataType::F32), get_input_accessor(""))
+        graph << common_params.target
+              << common_params.fast_math_hint
+              << InputLayer(input_descriptor, get_input_accessor(common_params))
               << ConvolutionLayer(
                   5U, 5U, 20U,
-                  get_weights_accessor(data_path, "/cnn_data/lenet_model/conv1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/lenet_model/conv1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/lenet_model/conv1_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               .set_name("conv1")
               << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool1")
               << ConvolutionLayer(
                   5U, 5U, 50U,
-                  get_weights_accessor(data_path, "/cnn_data/lenet_model/conv2_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/lenet_model/conv2_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/lenet_model/conv2_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               .set_name("conv2")
               << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0))).set_name("pool2")
               << FullyConnectedLayer(
                   500U,
-                  get_weights_accessor(data_path, "/cnn_data/lenet_model/ip1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/lenet_model/ip1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/lenet_model/ip1_b.npy"))
               .set_name("ip1")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu")
               << FullyConnectedLayer(
                   10U,
-                  get_weights_accessor(data_path, "/cnn_data/lenet_model/ip2_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/lenet_model/ip2_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/lenet_model/ip2_b.npy"))
               .set_name("ip2")
               << SoftmaxLayer().set_name("prob")
-              << OutputLayer(get_output_accessor(""));
+              << OutputLayer(get_output_accessor(common_params));
 
         // Finalize graph
         GraphConfig config;
-        config.use_tuner = (target == 2);
-        graph.finalize(target_hint, config);
+        config.num_threads = common_params.threads;
+        config.use_tuner   = common_params.enable_tuner;
+        config.tuner_file  = common_params.tuner_file;
+
+        graph.finalize(common_params.target, config);
+
+        return true;
     }
     void do_run() override
     {
@@ -130,13 +124,18 @@
     }
 
 private:
-    Stream graph{ 0, "LeNet" };
+    CommandLineParser  cmd_parser;
+    CommonGraphOptions common_opts;
+    CommonGraphParams  common_params;
+    Stream             graph;
 };
 
 /** Main program for LeNet
  *
+ * @note To list all the possible arguments execute the binary appended with the --help option
+ *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] batches, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_mobilenet.cpp b/examples/graph_mobilenet.cpp
index 50dc024..50238ca 100644
--- a/examples/graph_mobilenet.cpp
+++ b/examples/graph_mobilenet.cpp
@@ -23,11 +23,11 @@
  */
 #include "arm_compute/graph.h"
 #include "support/ToolchainSupport.h"
+#include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
 #include "utils/Utils.h"
 
-#include <cstdlib>
-
+using namespace arm_compute;
 using namespace arm_compute::utils;
 using namespace arm_compute::graph::frontend;
 using namespace arm_compute::graph_utils;
@@ -35,96 +35,107 @@
 /** Example demonstrating how to implement MobileNet's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] data layout, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 class GraphMobilenetExample : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    GraphMobilenetExample()
+        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "MobileNetV1")
     {
-        std::string data_path; /* Path to the trainable data */
-        std::string image;     /* Image data */
-        std::string label;     /* Label data */
+        // Add model id option
+        model_id_opt = cmd_parser.add_option<SimpleOption<int>>("model-id", 0);
+        model_id_opt->set_help("Mobilenet model id (0: 1.0_224, else: 0.75_160");
+    }
+    GraphMobilenetExample(const GraphMobilenetExample &) = delete;
+    GraphMobilenetExample &operator=(const GraphMobilenetExample &) = delete;
+    GraphMobilenetExample(GraphMobilenetExample &&)                 = default; // NOLINT
+    GraphMobilenetExample &operator=(GraphMobilenetExample &&) = default;      // NOLINT
+    ~GraphMobilenetExample() override                          = default;
+    bool do_setup(int argc, char **argv) override
+    {
+        // Parse arguments
+        cmd_parser.parse(argc, argv);
+
+        // Consume common parameters
+        common_params = consume_common_graph_parameters(common_opts);
+
+        // Return when help menu is requested
+        if(common_params.help)
+        {
+            cmd_parser.print_help(argv[0]);
+            return false;
+        }
+
+        // Checks
+        ARM_COMPUTE_EXIT_ON_MSG(common_params.data_type == DataType::F16 && common_params.target == Target::NEON, "F16 NEON not supported for this graph");
+
+        // Print parameter values
+        std::cout << common_params << std::endl;
+
+        // Get model parameters
+        int model_id = model_id_opt->value();
+
+        // Create input descriptor
+        unsigned int spatial_size = (model_id == 0 || common_params.data_type == DataType::QASYMM8) ? 224 : 160;
+
+        // Create input descriptor
+        const TensorShape tensor_shape     = permute_shape(TensorShape(spatial_size, spatial_size, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+
+        // Set graph hints
+        graph << common_params.target
+              << DepthwiseConvolutionMethod::Optimized3x3
+              << common_params.fast_math_hint;
+
+        // Create core graph
+        if(arm_compute::is_data_type_float(common_params.data_type))
+        {
+            create_graph_float(input_descriptor, model_id);
+        }
+        else
+        {
+            create_graph_qasymm(input_descriptor);
+        }
+
+        // Create common tail
+        graph << ReshapeLayer(TensorShape(1001U)).set_name("Reshape")
+              << SoftmaxLayer().set_name("Softmax")
+              << OutputLayer(get_output_accessor(common_params, 5));
+
+        // Finalize graph
+        GraphConfig config;
+        config.num_threads = common_params.threads;
+        config.use_tuner   = common_params.enable_tuner;
+        config.tuner_file  = common_params.tuner_file;
+
+        graph.finalize(common_params.target, config);
+
+        return true;
+    }
+    void do_run() override
+    {
+        // Run graph
+        graph.run();
+    }
+
+private:
+    CommandLineParser  cmd_parser;
+    CommonGraphOptions common_opts;
+    SimpleOption<int> *model_id_opt{ nullptr };
+    CommonGraphParams  common_params;
+    Stream             graph;
+
+    void create_graph_float(TensorDescriptor &input_descriptor, int model_id)
+    {
+        float       depth_scale = (model_id == 0) ? 1.f : 0.75;
+        std::string model_path  = (model_id == 0) ? "/cnn_data/mobilenet_v1_1_224_model/" : "/cnn_data/mobilenet_v1_075_160_model/";
 
         // Create a preprocessor object
         std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>();
 
-        // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
-        const int                  target                     = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
-        Target                     target_hint                = set_target_hint(target);
-        ConvolutionMethod          convolution_hint           = ConvolutionMethod::GEMM;
-        DepthwiseConvolutionMethod depthwise_convolution_hint = DepthwiseConvolutionMethod::OPTIMIZED_3x3;
-        FastMathHint               fast_math_hint             = FastMathHint::DISABLED;
-
-        // Set model to execute. 0 (MobileNetV1_1.0_224), 1 (MobileNetV1_0.75_160)
-        int model_id = (argc > 2) ? std::strtol(argv[2], nullptr, 10) : 0;
-        ARM_COMPUTE_ERROR_ON_MSG(model_id > 1, "Invalid model ID. Model must be 0 (MobileNetV1_1.0_224) or 1 (MobileNetV1_0.75_160)");
-        int layout_id = (argc > 3) ? std::strtol(argv[3], nullptr, 10) : 0;
-        ARM_COMPUTE_ERROR_ON_MSG(layout_id > 1, "Invalid layout ID. Layout must be 0 (NCHW) or 1 (NHWC)");
-
-        float            depth_scale           = (model_id == 0) ? 1.f : 0.75;
-        unsigned int     spatial_size          = (model_id == 0) ? 224 : 160;
-        std::string      model_path            = (model_id == 0) ? "/cnn_data/mobilenet_v1_1_224_model/" : "/cnn_data/mobilenet_v1_075_160_model/";
-        TensorDescriptor input_descriptor_nchw = TensorDescriptor(TensorShape(spatial_size, spatial_size, 3U, 1U), DataType::F32);
-        TensorDescriptor input_descriptor_nhwc = TensorDescriptor(TensorShape(3U, spatial_size, spatial_size, 1U), DataType::F32).set_layout(DataLayout::NHWC);
-        TensorDescriptor input_descriptor      = (layout_id == 0) ? input_descriptor_nchw : input_descriptor_nhwc;
-
-        // Parse arguments
-        if(argc < 2)
-        {
-            // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [model] [layout] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No model ID provided: using MobileNetV1_1.0_224\n\n";
-            std::cout << "No data layout provided: using NCHW\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 2)
-        {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [model] [layout] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No model ID provided: using MobileNetV1_1.0_224\n\n";
-            std::cout << "No data layout provided: using NCHW\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 3)
-        {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [layout] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data layout provided: using NCHW\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 4)
-        {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 5)
-        {
-            data_path = argv[4];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No image provided: using random values\n\n";
-            std::cout << "No text file with labels provided: skipping output accessor\n\n";
-        }
-        else if(argc == 6)
-        {
-            data_path = argv[4];
-            image     = argv[5];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
-            std::cout << "No text file with labels provided: skipping output accessor\n\n";
-        }
-        else if(argc == 7)
-        {
-            data_path = argv[4];
-            image     = argv[5];
-            label     = argv[6];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
-            std::cout << "No fast math info provided: disabling fast math\n\n";
-        }
-        else
-        {
-            data_path      = argv[4];
-            image          = argv[5];
-            label          = argv[6];
-            fast_math_hint = (std::strtol(argv[7], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
-        }
+        // Get trainable parameters data path
+        std::string data_path = common_params.data_path;
 
         // Add model path to data path
         if(!data_path.empty())
@@ -132,12 +143,8 @@
             data_path += model_path;
         }
 
-        graph << target_hint
-              << convolution_hint
-              << depthwise_convolution_hint
-              << fast_math_hint
-              << InputLayer(input_descriptor,
-                            get_input_accessor(image, std::move(preprocessor), false))
+        graph << InputLayer(input_descriptor,
+                            get_input_accessor(common_params, std::move(preprocessor), false))
               << ConvolutionLayer(
                   3U, 3U, 32U * depth_scale,
                   get_weights_accessor(data_path, "Conv2d_0_weights.npy", DataLayout::NCHW),
@@ -152,47 +159,122 @@
                   0.001f)
               .set_name("Conv2d_0/BatchNorm")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name("Conv2d_0/Relu6");
-        graph << get_dwsc_node(data_path, "Conv2d_1", 64 * depth_scale, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node(data_path, "Conv2d_2", 128 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node(data_path, "Conv2d_3", 128 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node(data_path, "Conv2d_4", 256 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node(data_path, "Conv2d_5", 256 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node(data_path, "Conv2d_6", 512 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node(data_path, "Conv2d_7", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node(data_path, "Conv2d_8", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node(data_path, "Conv2d_9", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node(data_path, "Conv2d_10", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node(data_path, "Conv2d_11", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node(data_path, "Conv2d_12", 1024 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node(data_path, "Conv2d_13", 1024 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_1", 64 * depth_scale, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_2", 128 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_3", 128 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_4", 256 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_5", 256 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_6", 512 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_7", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_8", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_9", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_10", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_11", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_12", 1024 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_13", 1024 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
         graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)).set_name("Logits/AvgPool_1a")
               << ConvolutionLayer(
                   1U, 1U, 1001U,
                   get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy", DataLayout::NCHW),
                   get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_biases.npy"),
                   PadStrideInfo(1, 1, 0, 0))
-              .set_name("Logits/Conv2d_1c_1x1")
-              << ReshapeLayer(TensorShape(1001U)).set_name("Reshape")
-              << SoftmaxLayer().set_name("Softmax")
-              << OutputLayer(get_output_accessor(label, 5));
-
-        // Finalize graph
-        GraphConfig config;
-        config.use_tuner = (target == 2);
-        graph.finalize(target_hint, config);
+              .set_name("Logits/Conv2d_1c_1x1");
     }
-    void do_run() override
+
+    void create_graph_qasymm(TensorDescriptor &input_descriptor)
     {
-        // Run graph
-        graph.run();
+        // Get trainable parameters data path
+        std::string data_path = common_params.data_path;
+
+        // Quantization info taken from the AndroidNN QASYMM8 MobileNet example
+        const QuantizationInfo in_quant_info  = QuantizationInfo(0.0078125f, 128);
+        const QuantizationInfo mid_quant_info = QuantizationInfo(0.0784313753247f, 128);
+
+        const std::vector<QuantizationInfo> conv_weights_quant_info =
+        {
+            QuantizationInfo(0.031778190285f, 156), // conv0
+            QuantizationInfo(0.00604454148561f, 66) // conv14
+        };
+
+        const std::vector<QuantizationInfo> depth_weights_quant_info =
+        {
+            QuantizationInfo(0.254282623529f, 129),  // dwsc1
+            QuantizationInfo(0.12828284502f, 172),   // dwsc2
+            QuantizationInfo(0.265911251307f, 83),   // dwsc3
+            QuantizationInfo(0.0985597148538f, 30),  // dwsc4
+            QuantizationInfo(0.0631204470992f, 54),  // dwsc5
+            QuantizationInfo(0.0137207424268f, 141), // dwsc6
+            QuantizationInfo(0.0817828401923f, 125), // dwsc7
+            QuantizationInfo(0.0393880493939f, 164), // dwsc8
+            QuantizationInfo(0.211694166064f, 129),  // dwsc9
+            QuantizationInfo(0.158015936613f, 103),  // dwsc10
+            QuantizationInfo(0.0182712618262f, 137), // dwsc11
+            QuantizationInfo(0.0127998134121f, 134), // dwsc12
+            QuantizationInfo(0.299285322428f, 161)   // dwsc13
+        };
+
+        const std::vector<QuantizationInfo> point_weights_quant_info =
+        {
+            QuantizationInfo(0.0425766184926f, 129),  // dwsc1
+            QuantizationInfo(0.0250773020089f, 94),   // dwsc2
+            QuantizationInfo(0.015851572156f, 93),    // dwsc3
+            QuantizationInfo(0.0167811904103f, 98),   // dwsc4
+            QuantizationInfo(0.00951790809631f, 135), // dwsc5
+            QuantizationInfo(0.00999817531556f, 128), // dwsc6
+            QuantizationInfo(0.00590536883101f, 126), // dwsc7
+            QuantizationInfo(0.00576109671965f, 133), // dwsc8
+            QuantizationInfo(0.00830461271107f, 142), // dwsc9
+            QuantizationInfo(0.0152327232063f, 72),   // dwsc10
+            QuantizationInfo(0.00741417845711f, 125), // dwsc11
+            QuantizationInfo(0.0135628981516f, 142),  // dwsc12
+            QuantizationInfo(0.0338749065995f, 140)   // dwsc13
+        };
+
+        graph << InputLayer(input_descriptor.set_quantization_info(in_quant_info),
+                            get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/" + common_params.image))
+              << ConvolutionLayer(
+                  3U, 3U, 32U,
+                  get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Conv2d_0_weights.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Conv2d_0_bias.npy"),
+                  PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR),
+                  1, conv_weights_quant_info.at(0), mid_quant_info)
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f));
+        graph << get_dwsc_node_qasymm(data_path, "Conv2d_1", 64U, PadStrideInfo(1U, 1U, 1U, 1U), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(0), point_weights_quant_info.at(0));
+        graph << get_dwsc_node_qasymm(data_path, "Conv2d_2", 128U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(1),
+                                      point_weights_quant_info.at(1));
+        graph << get_dwsc_node_qasymm(data_path, "Conv2d_3", 128U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(2),
+                                      point_weights_quant_info.at(2));
+        graph << get_dwsc_node_qasymm(data_path, "Conv2d_4", 256U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(3),
+                                      point_weights_quant_info.at(3));
+        graph << get_dwsc_node_qasymm(data_path, "Conv2d_5", 256U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(4),
+                                      point_weights_quant_info.at(4));
+        graph << get_dwsc_node_qasymm(data_path, "Conv2d_6", 512U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(5),
+                                      point_weights_quant_info.at(5));
+        graph << get_dwsc_node_qasymm(data_path, "Conv2d_7", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(6),
+                                      point_weights_quant_info.at(6));
+        graph << get_dwsc_node_qasymm(data_path, "Conv2d_8", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(7),
+                                      point_weights_quant_info.at(7));
+        graph << get_dwsc_node_qasymm(data_path, "Conv2d_9", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(8),
+                                      point_weights_quant_info.at(8));
+        graph << get_dwsc_node_qasymm(data_path, "Conv2d_10", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(9),
+                                      point_weights_quant_info.at(9));
+        graph << get_dwsc_node_qasymm(data_path, "Conv2d_11", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(10),
+                                      point_weights_quant_info.at(10));
+        graph << get_dwsc_node_qasymm(data_path, "Conv2d_12", 1024U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(11),
+                                      point_weights_quant_info.at(11));
+        graph << get_dwsc_node_qasymm(data_path, "Conv2d_13", 1024U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(12),
+                                      point_weights_quant_info.at(12))
+              << PoolingLayer(PoolingLayerInfo(PoolingType::AVG))
+              << ConvolutionLayer(
+                  1U, 1U, 1001U,
+                  get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Logits_Conv2d_1c_1x1_weights.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Logits_Conv2d_1c_1x1_bias.npy"),
+                  PadStrideInfo(1U, 1U, 0U, 0U), 1, conv_weights_quant_info.at(1));
     }
 
-private:
-    Stream graph{ 0, "MobileNetV1" };
-
-    BranchLayer get_dwsc_node(const std::string &data_path, std::string &&param_path,
-                              unsigned int  conv_filt,
-                              PadStrideInfo dwc_pad_stride_info, PadStrideInfo conv_pad_stride_info)
+    BranchLayer get_dwsc_node_float(const std::string &data_path, std::string &&param_path,
+                                    unsigned int  conv_filt,
+                                    PadStrideInfo dwc_pad_stride_info, PadStrideInfo conv_pad_stride_info)
     {
         std::string total_path = param_path + "_";
         SubStream   sg(graph);
@@ -227,18 +309,38 @@
 
         return BranchLayer(std::move(sg));
     }
+
+    BranchLayer get_dwsc_node_qasymm(const std::string &data_path, std::string &&param_path,
+                                     const unsigned int conv_filt,
+                                     PadStrideInfo dwc_pad_stride_info, PadStrideInfo conv_pad_stride_info,
+                                     QuantizationInfo depth_weights_quant_info, QuantizationInfo point_weights_quant_info)
+    {
+        std::string total_path = "/cnn_data/mobilenet_qasymm8_model/" + param_path + "_";
+        SubStream   sg(graph);
+
+        sg << DepthwiseConvolutionLayer(
+               3U, 3U,
+               get_weights_accessor(data_path, total_path + "depthwise_weights.npy"),
+               get_weights_accessor(data_path, total_path + "depthwise_bias.npy"),
+               dwc_pad_stride_info, depth_weights_quant_info)
+           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f))
+           << ConvolutionLayer(
+               1U, 1U, conv_filt,
+               get_weights_accessor(data_path, total_path + "pointwise_weights.npy"),
+               get_weights_accessor(data_path, total_path + "pointwise_bias.npy"),
+               conv_pad_stride_info, 1, point_weights_quant_info)
+           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f));
+
+        return BranchLayer(std::move(sg));
+    }
 };
 
 /** Main program for MobileNetV1
  *
+ * @note To list all the possible arguments execute the binary appended with the --help option
+ *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner),
- *                             [optional] Model ID (0 = MobileNetV1_1.0_224, 1 = MobileNetV1_0.75_160),
- *                             [optional] Path to the weights folder,
- *                             [optional] image,
- *                             [optional] labels,
- *                             [optional] data layout,
- *                             [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_mobilenet_qasymm8.cpp b/examples/graph_mobilenet_qasymm8.cpp
deleted file mode 100644
index 6cf0e48..0000000
--- a/examples/graph_mobilenet_qasymm8.cpp
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/graph.h"
-#include "support/ToolchainSupport.h"
-#include "utils/GraphUtils.h"
-#include "utils/Utils.h"
-
-#include <cstdlib>
-
-using namespace arm_compute;
-using namespace arm_compute::utils;
-using namespace arm_compute::graph::frontend;
-using namespace arm_compute::graph_utils;
-
-/** Example demonstrating how to implement QASYMM8 MobileNet's network using the Compute Library's graph API
- *
- * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] npy_input, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
- */
-class GraphMobileNetQASYMM8Example : public Example
-{
-public:
-    void do_setup(int argc, char **argv) override
-    {
-        std::string data_path; /* Path to the trainable data */
-        std::string input;     /* Image data */
-        std::string label;     /* Label data */
-
-        // Quantization info taken from the AndroidNN QASYMM8 MobileNet example
-        const QuantizationInfo in_quant_info  = QuantizationInfo(0.0078125f, 128);
-        const QuantizationInfo mid_quant_info = QuantizationInfo(0.0784313753247f, 128);
-
-        const std::vector<QuantizationInfo> conv_weights_quant_info =
-        {
-            QuantizationInfo(0.031778190285f, 156), // conv0
-            QuantizationInfo(0.00604454148561f, 66) // conv14
-        };
-
-        const std::vector<QuantizationInfo> depth_weights_quant_info =
-        {
-            QuantizationInfo(0.254282623529f, 129),  // dwsc1
-            QuantizationInfo(0.12828284502f, 172),   // dwsc2
-            QuantizationInfo(0.265911251307f, 83),   // dwsc3
-            QuantizationInfo(0.0985597148538f, 30),  // dwsc4
-            QuantizationInfo(0.0631204470992f, 54),  // dwsc5
-            QuantizationInfo(0.0137207424268f, 141), // dwsc6
-            QuantizationInfo(0.0817828401923f, 125), // dwsc7
-            QuantizationInfo(0.0393880493939f, 164), // dwsc8
-            QuantizationInfo(0.211694166064f, 129),  // dwsc9
-            QuantizationInfo(0.158015936613f, 103),  // dwsc10
-            QuantizationInfo(0.0182712618262f, 137), // dwsc11
-            QuantizationInfo(0.0127998134121f, 134), // dwsc12
-            QuantizationInfo(0.299285322428f, 161)   // dwsc13
-        };
-
-        const std::vector<QuantizationInfo> point_weights_quant_info =
-        {
-            QuantizationInfo(0.0425766184926f, 129),  // dwsc1
-            QuantizationInfo(0.0250773020089f, 94),   // dwsc2
-            QuantizationInfo(0.015851572156f, 93),    // dwsc3
-            QuantizationInfo(0.0167811904103f, 98),   // dwsc4
-            QuantizationInfo(0.00951790809631f, 135), // dwsc5
-            QuantizationInfo(0.00999817531556f, 128), // dwsc6
-            QuantizationInfo(0.00590536883101f, 126), // dwsc7
-            QuantizationInfo(0.00576109671965f, 133), // dwsc8
-            QuantizationInfo(0.00830461271107f, 142), // dwsc9
-            QuantizationInfo(0.0152327232063f, 72),   // dwsc10
-            QuantizationInfo(0.00741417845711f, 125), // dwsc11
-            QuantizationInfo(0.0135628981516f, 142),  // dwsc12
-            QuantizationInfo(0.0338749065995f, 140)   // dwsc13
-        };
-
-        // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
-        const int    target         = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
-        Target       target_hint    = set_target_hint(target);
-        FastMathHint fast_math_hint = FastMathHint::DISABLED;
-
-        // Parse arguments
-        if(argc < 2)
-        {
-            // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [npy_input] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 2)
-        {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [npy_input] [labels] [fast_math_hint]\n\n";
-            std::cout << "No input provided: using random values\n\n";
-        }
-        else if(argc == 4)
-        {
-            data_path = argv[2];
-            input     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
-            std::cout << "No text file with labels provided: skipping output accessor\n\n";
-        }
-        else if(argc == 5)
-        {
-            data_path = argv[2];
-            input     = argv[3];
-            label     = argv[4];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
-            std::cout << "No fast math info provided: disabling fast math\n\n";
-        }
-        else
-        {
-            data_path      = argv[2];
-            input          = argv[3];
-            label          = argv[4];
-            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
-        }
-
-        graph << target_hint
-              << DepthwiseConvolutionMethod::OPTIMIZED_3x3
-              << fast_math_hint
-              << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::QASYMM8, in_quant_info),
-                            get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/" + input))
-              << ConvolutionLayer(
-                  3U, 3U, 32U,
-                  get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Conv2d_0_weights.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Conv2d_0_bias.npy"),
-                  PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR),
-                  1, conv_weights_quant_info.at(0), mid_quant_info)
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f));
-        graph << get_dwsc_node(data_path, "Conv2d_1", 64U, PadStrideInfo(1U, 1U, 1U, 1U), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(0), point_weights_quant_info.at(0));
-        graph << get_dwsc_node(data_path, "Conv2d_2", 128U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(1),
-                               point_weights_quant_info.at(1));
-        graph << get_dwsc_node(data_path, "Conv2d_3", 128U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(2),
-                               point_weights_quant_info.at(2));
-        graph << get_dwsc_node(data_path, "Conv2d_4", 256U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(3),
-                               point_weights_quant_info.at(3));
-        graph << get_dwsc_node(data_path, "Conv2d_5", 256U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(4),
-                               point_weights_quant_info.at(4));
-        graph << get_dwsc_node(data_path, "Conv2d_6", 512U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(5),
-                               point_weights_quant_info.at(5));
-        graph << get_dwsc_node(data_path, "Conv2d_7", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(6),
-                               point_weights_quant_info.at(6));
-        graph << get_dwsc_node(data_path, "Conv2d_8", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(7),
-                               point_weights_quant_info.at(7));
-        graph << get_dwsc_node(data_path, "Conv2d_9", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(8),
-                               point_weights_quant_info.at(8));
-        graph << get_dwsc_node(data_path, "Conv2d_10", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(9),
-                               point_weights_quant_info.at(9));
-        graph << get_dwsc_node(data_path, "Conv2d_11", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(10),
-                               point_weights_quant_info.at(10));
-        graph << get_dwsc_node(data_path, "Conv2d_12", 1024U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(11),
-                               point_weights_quant_info.at(11));
-        graph << get_dwsc_node(data_path, "Conv2d_13", 1024U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(12),
-                               point_weights_quant_info.at(12))
-              << PoolingLayer(PoolingLayerInfo(PoolingType::AVG))
-              << ConvolutionLayer(
-                  1U, 1U, 1001U,
-                  get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Logits_Conv2d_1c_1x1_weights.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/mobilenet_qasymm8_model/Logits_Conv2d_1c_1x1_bias.npy"),
-                  PadStrideInfo(1U, 1U, 0U, 0U), 1, conv_weights_quant_info.at(1))
-              << ReshapeLayer(TensorShape(1001U))
-              << SoftmaxLayer()
-              << OutputLayer(get_output_accessor(label, 5));
-
-        // Finalize graph
-        GraphConfig config;
-        config.use_tuner = (target == 2);
-        graph.finalize(target_hint, config);
-    }
-    void do_run() override
-    {
-        // Run graph
-        graph.run();
-    }
-
-private:
-    Stream graph{ 0, "MobileNetV1_QASYMM8" };
-
-    /** This function produces a depthwise separable convolution node (i.e. depthwise + pointwise layers) with ReLU6 activation after each layer.
-     *
-     * @param[in] data_path                Path to trainable data folder
-     * @param[in] param_path               Prefix of specific set of weights/biases data
-     * @param[in] conv_filt                Filters depths for pointwise convolution
-     * @param[in] dwc_pad_stride_info      PadStrideInfo for depthwise convolution
-     * @param[in] conv_pad_stride_info     PadStrideInfo for pointwise convolution
-     * @param[in] depth_weights_quant_info QuantizationInfo for depthwise convolution's weights
-     * @param[in] point_weights_quant_info QuantizationInfo for pointwise convolution's weights
-     *
-     * @return The complete dwsc node
-     */
-    BranchLayer get_dwsc_node(const std::string &data_path, std::string &&param_path,
-                              const unsigned int conv_filt,
-                              PadStrideInfo dwc_pad_stride_info, PadStrideInfo conv_pad_stride_info,
-                              QuantizationInfo depth_weights_quant_info, QuantizationInfo point_weights_quant_info)
-    {
-        std::string total_path = "/cnn_data/mobilenet_qasymm8_model/" + param_path + "_";
-        SubStream   sg(graph);
-
-        sg << DepthwiseConvolutionLayer(
-               3U, 3U,
-               get_weights_accessor(data_path, total_path + "depthwise_weights.npy"),
-               get_weights_accessor(data_path, total_path + "depthwise_bias.npy"),
-               dwc_pad_stride_info, depth_weights_quant_info)
-           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f))
-           << ConvolutionLayer(
-               1U, 1U, conv_filt,
-               get_weights_accessor(data_path, total_path + "pointwise_weights.npy"),
-               get_weights_accessor(data_path, total_path + "pointwise_bias.npy"),
-               conv_pad_stride_info, 1, point_weights_quant_info)
-           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f));
-
-        return BranchLayer(std::move(sg));
-    }
-};
-/** Main program for MobileNetQASYMM8
- *
- * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to the weights folder, [optional] npy_input, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
- */
-int main(int argc, char **argv)
-{
-    return arm_compute::utils::run_example<GraphMobileNetQASYMM8Example>(argc, argv);
-}
diff --git a/examples/graph_resnet50.cpp b/examples/graph_resnet50.cpp
index bafa9a5..e909955 100644
--- a/examples/graph_resnet50.cpp
+++ b/examples/graph_resnet50.cpp
@@ -23,11 +23,10 @@
  */
 #include "arm_compute/graph.h"
 #include "support/ToolchainSupport.h"
+#include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
 #include "utils/Utils.h"
 
-#include <cstdlib>
-
 using namespace arm_compute::utils;
 using namespace arm_compute::graph::frontend;
 using namespace arm_compute::graph_utils;
@@ -35,75 +34,58 @@
 /** Example demonstrating how to implement ResNet50 network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 class GraphResNet50Example : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    GraphResNet50Example()
+        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "ResNet50")
     {
-        std::string data_path; /* Path to the trainable data */
-        std::string image;     /* Image data */
-        std::string label;     /* Label data */
+    }
+    bool do_setup(int argc, char **argv) override
+    {
+        // Parse arguments
+        cmd_parser.parse(argc, argv);
+
+        // Consume common parameters
+        common_params = consume_common_graph_parameters(common_opts);
+
+        // Return when help menu is requested
+        if(common_params.help)
+        {
+            cmd_parser.print_help(argv[0]);
+            return false;
+        }
+
+        // Checks
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+        ARM_COMPUTE_EXIT_ON_MSG(common_params.data_type == DataType::F16 && common_params.target == Target::NEON, "F16 NEON not supported for this graph");
+
+        // Print parameter values
+        std::cout << common_params << std::endl;
+
+        // Get trainable parameters data path
+        std::string data_path = common_params.data_path;
 
         // Create a preprocessor object
         const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } };
         std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb,
                                                                                                                    false /* Do not convert to BGR */);
 
-        // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
-        const int    target         = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
-        Target       target_hint    = set_target_hint(target);
-        FastMathHint fast_math_hint = FastMathHint::DISABLED;
+        // Create input descriptor
+        const TensorShape tensor_shape     = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
 
-        // Parse arguments
-        if(argc < 2)
-        {
-            // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 2)
-        {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 3)
-        {
-            data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No image provided: using random values\n\n";
-        }
-        else if(argc == 4)
-        {
-            data_path = argv[2];
-            image     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
-            std::cout << "No text file with labels provided: skipping output accessor\n\n";
-        }
-        else if(argc == 5)
-        {
-            data_path = argv[2];
-            image     = argv[3];
-            label     = argv[4];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
-            std::cout << "No fast math info provided: disabling fast math\n\n";
-        }
-        else
-        {
-            data_path      = argv[2];
-            image          = argv[3];
-            label          = argv[4];
-            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
-        }
+        // Set weights trained layout
+        const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << target_hint
-              << fast_math_hint
-              << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32),
-                            get_input_accessor(image, std::move(preprocessor), false /* Do not convert to BGR */))
+        graph << common_params.target
+              << common_params.fast_math_hint
+              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false /* Do not convert to BGR */))
               << ConvolutionLayer(
                   7U, 7U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_weights.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_weights.npy", weights_layout),
                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                   PadStrideInfo(2, 2, 3, 3))
               .set_name("conv1/convolution")
@@ -117,26 +99,29 @@
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1/Relu")
               << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))).set_name("pool1/MaxPool");
 
-        add_residual_block(data_path, "block1", 64, 3, 2);
-        add_residual_block(data_path, "block2", 128, 4, 2);
-        add_residual_block(data_path, "block3", 256, 6, 2);
-        add_residual_block(data_path, "block4", 512, 3, 1);
+        add_residual_block(data_path, "block1", weights_layout, 64, 3, 2);
+        add_residual_block(data_path, "block2", weights_layout, 128, 4, 2);
+        add_residual_block(data_path, "block3", weights_layout, 256, 6, 2);
+        add_residual_block(data_path, "block4", weights_layout, 512, 3, 1);
 
         graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)).set_name("pool5")
               << ConvolutionLayer(
                   1U, 1U, 1000U,
-                  get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_weights.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_weights.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_biases.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               .set_name("logits/convolution")
               << FlattenLayer().set_name("predictions/Reshape")
               << SoftmaxLayer().set_name("predictions/Softmax")
-              << OutputLayer(get_output_accessor(label, 5));
+              << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
-        config.use_tuner = (target == 2);
-        graph.finalize(target_hint, config);
+        config.num_threads = common_params.threads;
+        config.use_tuner   = common_params.enable_tuner;
+        graph.finalize(common_params.target, config);
+
+        return true;
     }
 
     void do_run() override
@@ -146,9 +131,13 @@
     }
 
 private:
-    Stream graph{ 0, "ResNet50" };
+    CommandLineParser  cmd_parser;
+    CommonGraphOptions common_opts;
+    CommonGraphParams  common_params;
+    Stream             graph;
 
-    void add_residual_block(const std::string &data_path, const std::string &name, unsigned int base_depth, unsigned int num_units, unsigned int stride)
+    void add_residual_block(const std::string &data_path, const std::string &name, DataLayout weights_layout,
+                            unsigned int base_depth, unsigned int num_units, unsigned int stride)
     {
         for(unsigned int i = 0; i < num_units; ++i)
         {
@@ -170,7 +159,7 @@
             SubStream right(graph);
             right << ConvolutionLayer(
                       1U, 1U, base_depth,
-                      get_weights_accessor(data_path, unit_path + "conv1_weights.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout),
                       std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                       PadStrideInfo(1, 1, 0, 0))
                   .set_name(unit_name + "conv1/convolution")
@@ -185,7 +174,7 @@
 
                   << ConvolutionLayer(
                       3U, 3U, base_depth,
-                      get_weights_accessor(data_path, unit_path + "conv2_weights.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout),
                       std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                       PadStrideInfo(middle_stride, middle_stride, 1, 1))
                   .set_name(unit_name + "conv2/convolution")
@@ -200,7 +189,7 @@
 
                   << ConvolutionLayer(
                       1U, 1U, base_depth * 4,
-                      get_weights_accessor(data_path, unit_path + "conv3_weights.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv3_weights.npy", weights_layout),
                       std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                       PadStrideInfo(1, 1, 0, 0))
                   .set_name(unit_name + "conv3/convolution")
@@ -217,7 +206,7 @@
                 SubStream left(graph);
                 left << ConvolutionLayer(
                          1U, 1U, base_depth * 4,
-                         get_weights_accessor(data_path, unit_path + "shortcut_weights.npy"),
+                         get_weights_accessor(data_path, unit_path + "shortcut_weights.npy", weights_layout),
                          std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                          PadStrideInfo(1, 1, 0, 0))
                      .set_name(unit_name + "shortcut/convolution")
@@ -251,8 +240,10 @@
 
 /** Main program for ResNet50
  *
+ * @note To list all the possible arguments execute the binary appended with the --help option
+ *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_resnext50.cpp b/examples/graph_resnext50.cpp
index f96a02e..8f8e4a9 100644
--- a/examples/graph_resnext50.cpp
+++ b/examples/graph_resnext50.cpp
@@ -23,11 +23,10 @@
  */
 #include "arm_compute/graph.h"
 #include "support/ToolchainSupport.h"
+#include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
 #include "utils/Utils.h"
 
-#include <cstdlib>
-
 using namespace arm_compute::utils;
 using namespace arm_compute::graph::frontend;
 using namespace arm_compute::graph_utils;
@@ -35,92 +34,80 @@
 /** Example demonstrating how to implement ResNeXt50 network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] npy_in, [optional] npy_out, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 class GraphResNeXt50Example : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    GraphResNeXt50Example()
+        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "ResNeXt50")
     {
-        std::string data_path; /* Path to the trainable data */
-        std::string npy_in;    /* Input npy data */
-        std::string npy_out;   /* Output npy data */
-
-        // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
-        const int    target         = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
-        Target       target_hint    = set_target_hint(target);
-        FastMathHint fast_math_hint = FastMathHint::DISABLED;
-
+    }
+    bool do_setup(int argc, char **argv) override
+    {
         // Parse arguments
-        if(argc < 2)
+        cmd_parser.parse(argc, argv);
+
+        // Consume common parameters
+        common_params = consume_common_graph_parameters(common_opts);
+
+        // Return when help menu is requested
+        if(common_params.help)
         {
-            // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [npy_in] [npy_out] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 2)
-        {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [npy_in] [npy_out] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 3)
-        {
-            data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [npy_in] [npy_out] [fast_math_hint]\n\n";
-            std::cout << "No input npy file provided: using random values\n\n";
-        }
-        else if(argc == 4)
-        {
-            data_path = argv[2];
-            npy_in    = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [npy_out] [fast_math_hint]\n\n";
-            std::cout << "No output npy file provided: skipping output accessor\n\n";
-        }
-        else if(argc == 5)
-        {
-            data_path = argv[2];
-            npy_in    = argv[3];
-            npy_out   = argv[4];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
-            std::cout << "No fast math info provided: disabling fast math\n\n";
-        }
-        else
-        {
-            data_path      = argv[2];
-            npy_in         = argv[3];
-            npy_out        = argv[4];
-            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
+            cmd_parser.print_help(argv[0]);
+            return false;
         }
 
-        graph << target_hint
-              << fast_math_hint
-              << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32),
-                            get_input_accessor(npy_in))
+        // Checks
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+        ARM_COMPUTE_EXIT_ON_MSG(common_params.data_type == DataType::F16 && common_params.target == Target::NEON, "F16 NEON not supported for this graph");
+
+        // Print parameter values
+        std::cout << common_params << std::endl;
+
+        // Get trainable parameters data path
+        std::string data_path = common_params.data_path;
+
+        // Create input descriptor
+        const TensorShape tensor_shape     = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+
+        // Set weights trained layout
+        const DataLayout weights_layout = DataLayout::NCHW;
+
+        graph << common_params.target
+              << common_params.fast_math_hint
+              << InputLayer(input_descriptor, get_input_accessor(common_params))
               << ScaleLayer(get_weights_accessor(data_path, "/cnn_data/resnext50_model/bn_data_mul.npy"),
                             get_weights_accessor(data_path, "/cnn_data/resnext50_model/bn_data_add.npy"))
               .set_name("bn_data/Scale")
               << ConvolutionLayer(
                   7U, 7U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_weights.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_weights.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_biases.npy"),
                   PadStrideInfo(2, 2, 2, 3, 2, 3, DimensionRoundingType::FLOOR))
               .set_name("conv0/Convolution")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv0/Relu")
               << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))).set_name("pool0");
 
-        add_residual_block(data_path, /*ofm*/ 256, /*stage*/ 1, /*num_unit*/ 3, /*stride_conv_unit1*/ 1);
-        add_residual_block(data_path, 512, 2, 4, 2);
-        add_residual_block(data_path, 1024, 3, 6, 2);
-        add_residual_block(data_path, 2048, 4, 3, 2);
+        add_residual_block(data_path, weights_layout, /*ofm*/ 256, /*stage*/ 1, /*num_unit*/ 3, /*stride_conv_unit1*/ 1);
+        add_residual_block(data_path, weights_layout, 512, 2, 4, 2);
+        add_residual_block(data_path, weights_layout, 1024, 3, 6, 2);
+        add_residual_block(data_path, weights_layout, 2048, 4, 3, 2);
 
         graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG)).set_name("pool1")
               << FlattenLayer().set_name("predictions/Reshape")
-              << OutputLayer(get_npy_output_accessor(npy_out, TensorShape(2048U), DataType::F32));
+              << OutputLayer(get_npy_output_accessor(common_params.labels, TensorShape(2048U), DataType::F32));
 
         // Finalize graph
         GraphConfig config;
-        config.use_tuner = (target == 2);
-        graph.finalize(target_hint, config);
+        config.num_threads = common_params.threads;
+        config.use_tuner   = common_params.enable_tuner;
+        config.tuner_file  = common_params.tuner_file;
+
+        graph.finalize(common_params.target, config);
+
+        return true;
     }
 
     void do_run() override
@@ -130,9 +117,13 @@
     }
 
 private:
-    Stream graph{ 0, "ResNeXt50" };
+    CommandLineParser  cmd_parser;
+    CommonGraphOptions common_opts;
+    CommonGraphParams  common_params;
+    Stream             graph;
 
-    void add_residual_block(const std::string &data_path, unsigned int base_depth, unsigned int stage, unsigned int num_units, unsigned int stride_conv_unit1)
+    void add_residual_block(const std::string &data_path, DataLayout weights_layout,
+                            unsigned int base_depth, unsigned int stage, unsigned int num_units, unsigned int stride_conv_unit1)
     {
         for(unsigned int i = 0; i < num_units; ++i)
         {
@@ -153,7 +144,7 @@
             SubStream right(graph);
             right << ConvolutionLayer(
                       1U, 1U, base_depth / 2,
-                      get_weights_accessor(data_path, unit_path + "conv1_weights.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout),
                       get_weights_accessor(data_path, unit_path + "conv1_biases.npy"),
                       PadStrideInfo(1, 1, 0, 0))
                   .set_name(unit_name + "conv1/convolution")
@@ -161,7 +152,7 @@
 
                   << ConvolutionLayer(
                       3U, 3U, base_depth / 2,
-                      get_weights_accessor(data_path, unit_path + "conv2_weights.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout),
                       std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                       pad_grouped_conv, 32)
                   .set_name(unit_name + "conv2/convolution")
@@ -172,7 +163,7 @@
 
                   << ConvolutionLayer(
                       1U, 1U, base_depth,
-                      get_weights_accessor(data_path, unit_path + "conv3_weights.npy"),
+                      get_weights_accessor(data_path, unit_path + "conv3_weights.npy", weights_layout),
                       get_weights_accessor(data_path, unit_path + "conv3_biases.npy"),
                       PadStrideInfo(1, 1, 0, 0))
                   .set_name(unit_name + "conv3/convolution");
@@ -182,7 +173,7 @@
             {
                 left << ConvolutionLayer(
                          1U, 1U, base_depth,
-                         get_weights_accessor(data_path, unit_path + "sc_weights.npy"),
+                         get_weights_accessor(data_path, unit_path + "sc_weights.npy", weights_layout),
                          std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                          PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 0, 0))
                      .set_name(unit_name + "sc/convolution")
@@ -199,8 +190,10 @@
 
 /** Main program for ResNeXt50
  *
+ * @note To list all the possible arguments execute the binary appended with the --help option
+ *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [[optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] npy_in, [optional] npy_out )
+ * @param[in] argv Arguments
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_squeezenet.cpp b/examples/graph_squeezenet.cpp
index b632688..6cdb9de 100644
--- a/examples/graph_squeezenet.cpp
+++ b/examples/graph_squeezenet.cpp
@@ -23,165 +23,150 @@
  */
 #include "arm_compute/graph.h"
 #include "support/ToolchainSupport.h"
+#include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
 #include "utils/Utils.h"
 
-#include <cstdlib>
-#include <tuple>
-
 using namespace arm_compute::utils;
 using namespace arm_compute::graph::frontend;
 using namespace arm_compute::graph_utils;
-using namespace arm_compute::logging;
 
 /** Example demonstrating how to implement Squeezenet's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 class GraphSqueezenetExample : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    GraphSqueezenetExample()
+        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "SqueezeNetV1")
     {
-        std::string data_path; /* Path to the trainable data */
-        std::string image;     /* Image data */
-        std::string label;     /* Label data */
+    }
+    bool do_setup(int argc, char **argv) override
+    {
+        // Parse arguments
+        cmd_parser.parse(argc, argv);
+
+        // Consume common parameters
+        common_params = consume_common_graph_parameters(common_opts);
+
+        // Return when help menu is requested
+        if(common_params.help)
+        {
+            cmd_parser.print_help(argv[0]);
+            return false;
+        }
+
+        // Checks
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+        ARM_COMPUTE_EXIT_ON_MSG(common_params.data_type == DataType::F16 && common_params.target == Target::NEON, "F16 NEON not supported for this graph");
+
+        // Print parameter values
+        std::cout << common_params << std::endl;
+
+        // Get trainable parameters data path
+        std::string data_path = common_params.data_path;
 
         // Create a preprocessor object
         const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } };
         std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb);
 
-        // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
-        const int    target         = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
-        Target       target_hint    = set_target_hint(target);
-        FastMathHint fast_math_hint = FastMathHint::DISABLED;
+        // Create input descriptor
+        const TensorShape tensor_shape     = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
 
-        // Parse arguments
-        if(argc < 2)
-        {
-            // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 2)
-        {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 3)
-        {
-            data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No image provided: using random values\n\n";
-        }
-        else if(argc == 4)
-        {
-            data_path = argv[2];
-            image     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
-            std::cout << "No text file with labels provided: skipping output accessor\n\n";
-        }
-        else if(argc == 5)
-        {
-            data_path = argv[2];
-            image     = argv[3];
-            label     = argv[4];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
-            std::cout << "No fast math info provided: disabling fast math\n\n";
-        }
-        else
-        {
-            data_path      = argv[2];
-            image          = argv[3];
-            label          = argv[4];
-            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
-        }
+        // Set weights trained layout
+        const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << target_hint
-              << fast_math_hint
-              << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32),
-                            get_input_accessor(image, std::move(preprocessor)))
+        graph << common_params.target
+              << common_params.fast_math_hint
+              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
               << ConvolutionLayer(
                   7U, 7U, 96U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_b.npy"),
                   PadStrideInfo(2, 2, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
               << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
               << ConvolutionLayer(
                   1U, 1U, 16U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire2_squeeze1x1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire2_squeeze1x1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire2_squeeze1x1_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
-        graph << get_expand_fire_node(data_path, "fire2", 64U, 64U);
+        graph << get_expand_fire_node(data_path, "fire2", weights_layout, 64U, 64U);
         graph << ConvolutionLayer(
                   1U, 1U, 16U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire3_squeeze1x1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire3_squeeze1x1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire3_squeeze1x1_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
-        graph << get_expand_fire_node(data_path, "fire3", 64U, 64U);
+        graph << get_expand_fire_node(data_path, "fire3", weights_layout, 64U, 64U);
         graph << ConvolutionLayer(
                   1U, 1U, 32U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire4_squeeze1x1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire4_squeeze1x1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire4_squeeze1x1_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
-        graph << get_expand_fire_node(data_path, "fire4", 128U, 128U);
+        graph << get_expand_fire_node(data_path, "fire4", weights_layout, 128U, 128U);
         graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
               << ConvolutionLayer(
                   1U, 1U, 32U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire5_squeeze1x1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire5_squeeze1x1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire5_squeeze1x1_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
-        graph << get_expand_fire_node(data_path, "fire5", 128U, 128U);
+        graph << get_expand_fire_node(data_path, "fire5", weights_layout, 128U, 128U);
         graph << ConvolutionLayer(
                   1U, 1U, 48U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire6_squeeze1x1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire6_squeeze1x1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire6_squeeze1x1_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
-        graph << get_expand_fire_node(data_path, "fire6", 192U, 192U);
+        graph << get_expand_fire_node(data_path, "fire6", weights_layout, 192U, 192U);
         graph << ConvolutionLayer(
                   1U, 1U, 48U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire7_squeeze1x1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire7_squeeze1x1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire7_squeeze1x1_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
-        graph << get_expand_fire_node(data_path, "fire7", 192U, 192U);
+        graph << get_expand_fire_node(data_path, "fire7", weights_layout, 192U, 192U);
         graph << ConvolutionLayer(
                   1U, 1U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire8_squeeze1x1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire8_squeeze1x1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire8_squeeze1x1_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
-        graph << get_expand_fire_node(data_path, "fire8", 256U, 256U);
+        graph << get_expand_fire_node(data_path, "fire8", weights_layout, 256U, 256U);
         graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
               << ConvolutionLayer(
                   1U, 1U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire9_squeeze1x1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire9_squeeze1x1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire9_squeeze1x1_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
-        graph << get_expand_fire_node(data_path, "fire9", 256U, 256U);
+        graph << get_expand_fire_node(data_path, "fire9", weights_layout, 256U, 256U);
         graph << ConvolutionLayer(
                   1U, 1U, 1000U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv10_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv10_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv10_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
               << PoolingLayer(PoolingLayerInfo(PoolingType::AVG))
               << FlattenLayer()
               << SoftmaxLayer()
-              << OutputLayer(get_output_accessor(label, 5));
+              << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
-        config.use_tuner = (target == 2);
-        graph.finalize(target_hint, config);
+        config.num_threads = common_params.threads;
+        config.use_tuner   = common_params.enable_tuner;
+        config.tuner_file  = common_params.tuner_file;
+
+        graph.finalize(common_params.target, config);
+
+        return true;
     }
     void do_run() override
     {
@@ -190,15 +175,19 @@
     }
 
 private:
-    Stream graph{ 0, "SqueezeNetV1" };
+    CommandLineParser  cmd_parser;
+    CommonGraphOptions common_opts;
+    CommonGraphParams  common_params;
+    Stream             graph;
 
-    BranchLayer get_expand_fire_node(const std::string &data_path, std::string &&param_path, unsigned int expand1_filt, unsigned int expand3_filt)
+    BranchLayer get_expand_fire_node(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
+                                     unsigned int expand1_filt, unsigned int expand3_filt)
     {
         std::string total_path = "/cnn_data/squeezenet_v1.0_model/" + param_path + "_";
         SubStream   i_a(graph);
         i_a << ConvolutionLayer(
                 1U, 1U, expand1_filt,
-                get_weights_accessor(data_path, total_path + "expand1x1_w.npy"),
+                get_weights_accessor(data_path, total_path + "expand1x1_w.npy", weights_layout),
                 get_weights_accessor(data_path, total_path + "expand1x1_b.npy"),
                 PadStrideInfo(1, 1, 0, 0))
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
@@ -206,7 +195,7 @@
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
                 3U, 3U, expand3_filt,
-                get_weights_accessor(data_path, total_path + "expand3x3_w.npy"),
+                get_weights_accessor(data_path, total_path + "expand3x3_w.npy", weights_layout),
                 get_weights_accessor(data_path, total_path + "expand3x3_b.npy"),
                 PadStrideInfo(1, 1, 1, 1))
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
@@ -217,8 +206,10 @@
 
 /** Main program for Squeezenet v1.0
  *
+ * @note To list all the possible arguments execute the binary appended with the --help option
+ *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_squeezenet_v1_1.cpp b/examples/graph_squeezenet_v1_1.cpp
index 9e3466b..f0b2b84 100644
--- a/examples/graph_squeezenet_v1_1.cpp
+++ b/examples/graph_squeezenet_v1_1.cpp
@@ -23,170 +23,150 @@
  */
 #include "arm_compute/graph.h"
 #include "support/ToolchainSupport.h"
+#include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
 #include "utils/Utils.h"
 
-#include <cstdlib>
-#include <tuple>
-
 using namespace arm_compute::utils;
 using namespace arm_compute::graph::frontend;
 using namespace arm_compute::graph_utils;
 
-namespace
-{
-} // namespace
-
 /** Example demonstrating how to implement Squeezenet's v1.1 network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 class GraphSqueezenet_v1_1Example : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    GraphSqueezenet_v1_1Example()
+        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "SqueezeNetV1.1")
     {
-        std::string data_path; /* Path to the trainable data */
-        std::string image;     /* Image data */
-        std::string label;     /* Label data */
+    }
+    bool do_setup(int argc, char **argv) override
+    {
+        // Parse arguments
+        cmd_parser.parse(argc, argv);
+
+        // Consume common parameters
+        common_params = consume_common_graph_parameters(common_opts);
+
+        // Return when help menu is requested
+        if(common_params.help)
+        {
+            cmd_parser.print_help(argv[0]);
+            return false;
+        }
+
+        // Checks
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+        ARM_COMPUTE_EXIT_ON_MSG(common_params.data_type == DataType::F16 && common_params.target == Target::NEON, "F16 NEON not supported for this graph");
+
+        // Print parameter values
+        std::cout << common_params << std::endl;
+
+        // Get trainable parameters data path
+        std::string data_path = common_params.data_path;
 
         // Create a preprocessor object
         const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } };
         std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb);
 
-        // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
-        const int    target         = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
-        Target       target_hint    = set_target_hint(target);
-        FastMathHint fast_math_hint = FastMathHint::DISABLED;
+        // Create input descriptor
+        const TensorShape tensor_shape     = permute_shape(TensorShape(227U, 227U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
 
-        // Parse arguments
-        if(argc < 2)
-        {
-            // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 2)
-        {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 3)
-        {
-            data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No image provided: using random values\n\n";
-        }
-        else if(argc == 4)
-        {
-            data_path = argv[2];
-            image     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
-            std::cout << "No text file with labels provided: skipping output accessor\n\n";
-        }
-        else if(argc == 5)
-        {
-            data_path = argv[2];
-            image     = argv[3];
-            label     = argv[4];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
-            std::cout << "No fast math info provided: disabling fast math\n\n";
-        }
-        else
-        {
-            data_path      = argv[2];
-            image          = argv[3];
-            label          = argv[4];
-            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
-        }
+        // Set weights trained layout
+        const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << target_hint
-              << fast_math_hint
-              << InputLayer(TensorDescriptor(TensorShape(227U, 227U, 3U, 1U), DataType::F32),
-                            get_input_accessor(image, std::move(preprocessor)))
-              << ConvolutionMethod::DIRECT
+        graph << common_params.target
+              << common_params.fast_math_hint
+              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
               << ConvolutionLayer(
                   3U, 3U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv1_b.npy"),
                   PadStrideInfo(2, 2, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
               << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
-              << ConvolutionMethod::DEFAULT
               << ConvolutionLayer(
                   1U, 1U, 16U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
-        graph << get_expand_fire_node(data_path, "fire2", 64U, 64U);
+        graph << get_expand_fire_node(data_path, "fire2", weights_layout, 64U, 64U);
         graph << ConvolutionLayer(
                   1U, 1U, 16U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire3_squeeze1x1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire3_squeeze1x1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire3_squeeze1x1_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
-        graph << get_expand_fire_node(data_path, "fire3", 64U, 64U);
+        graph << get_expand_fire_node(data_path, "fire3", weights_layout, 64U, 64U);
         graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
               << ConvolutionLayer(
                   1U, 1U, 32U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire4_squeeze1x1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire4_squeeze1x1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire4_squeeze1x1_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
-        graph << get_expand_fire_node(data_path, "fire4", 128U, 128U);
+        graph << get_expand_fire_node(data_path, "fire4", weights_layout, 128U, 128U);
         graph << ConvolutionLayer(
                   1U, 1U, 32U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire5_squeeze1x1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire5_squeeze1x1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire5_squeeze1x1_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
-        graph << get_expand_fire_node(data_path, "fire5", 128U, 128U);
+        graph << get_expand_fire_node(data_path, "fire5", weights_layout, 128U, 128U);
         graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
               << ConvolutionLayer(
                   1U, 1U, 48U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire6_squeeze1x1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire6_squeeze1x1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire6_squeeze1x1_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
-        graph << get_expand_fire_node(data_path, "fire6", 192U, 192U);
+        graph << get_expand_fire_node(data_path, "fire6", weights_layout, 192U, 192U);
         graph << ConvolutionLayer(
                   1U, 1U, 48U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire7_squeeze1x1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire7_squeeze1x1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire7_squeeze1x1_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
-        graph << get_expand_fire_node(data_path, "fire7", 192U, 192U);
+        graph << get_expand_fire_node(data_path, "fire7", weights_layout, 192U, 192U);
         graph << ConvolutionLayer(
                   1U, 1U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire8_squeeze1x1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire8_squeeze1x1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire8_squeeze1x1_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
-        graph << get_expand_fire_node(data_path, "fire8", 256U, 256U);
+        graph << get_expand_fire_node(data_path, "fire8", weights_layout, 256U, 256U);
         graph << ConvolutionLayer(
                   1U, 1U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire9_squeeze1x1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire9_squeeze1x1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire9_squeeze1x1_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
-        graph << get_expand_fire_node(data_path, "fire9", 256U, 256U);
+        graph << get_expand_fire_node(data_path, "fire9", weights_layout, 256U, 256U);
         graph << ConvolutionLayer(
                   1U, 1U, 1000U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv10_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv10_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv10_b.npy"),
                   PadStrideInfo(1, 1, 0, 0))
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
               << PoolingLayer(PoolingLayerInfo(PoolingType::AVG))
               << FlattenLayer()
               << SoftmaxLayer()
-              << OutputLayer(get_output_accessor(label, 5));
+              << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
-        config.use_tuner = (target == 2);
-        graph.finalize(target_hint, config);
+        config.num_threads = common_params.threads;
+        config.use_tuner   = common_params.enable_tuner;
+        config.tuner_file  = common_params.tuner_file;
+
+        graph.finalize(common_params.target, config);
+
+        return true;
     }
     void do_run() override
     {
@@ -195,15 +175,19 @@
     }
 
 private:
-    Stream graph{ 0, "SqueezeNetV1.1" };
+    CommandLineParser  cmd_parser;
+    CommonGraphOptions common_opts;
+    CommonGraphParams  common_params;
+    Stream             graph;
 
-    BranchLayer get_expand_fire_node(const std::string &data_path, std::string &&param_path, unsigned int expand1_filt, unsigned int expand3_filt)
+    BranchLayer get_expand_fire_node(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
+                                     unsigned int expand1_filt, unsigned int expand3_filt)
     {
         std::string total_path = "/cnn_data/squeezenet_v1_1_model/" + param_path + "_";
         SubStream   i_a(graph);
         i_a << ConvolutionLayer(
                 1U, 1U, expand1_filt,
-                get_weights_accessor(data_path, total_path + "expand1x1_w.npy"),
+                get_weights_accessor(data_path, total_path + "expand1x1_w.npy", weights_layout),
                 get_weights_accessor(data_path, total_path + "expand1x1_b.npy"),
                 PadStrideInfo(1, 1, 0, 0))
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
@@ -211,7 +195,7 @@
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
                 3U, 3U, expand3_filt,
-                get_weights_accessor(data_path, total_path + "expand3x3_w.npy"),
+                get_weights_accessor(data_path, total_path + "expand3x3_w.npy", weights_layout),
                 get_weights_accessor(data_path, total_path + "expand3x3_b.npy"),
                 PadStrideInfo(1, 1, 1, 1))
             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
@@ -222,8 +206,10 @@
 
 /** Main program for Squeezenet v1.1
  *
+ * @note To list all the possible arguments execute the binary appended with the --help option
+ *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_vgg16.cpp b/examples/graph_vgg16.cpp
index 72e7240..69b3a9d 100644
--- a/examples/graph_vgg16.cpp
+++ b/examples/graph_vgg16.cpp
@@ -23,11 +23,10 @@
  */
 #include "arm_compute/graph.h"
 #include "support/ToolchainSupport.h"
+#include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
 #include "utils/Utils.h"
 
-#include <cstdlib>
-
 using namespace arm_compute::utils;
 using namespace arm_compute::graph::frontend;
 using namespace arm_compute::graph_utils;
@@ -35,89 +34,66 @@
 /** Example demonstrating how to implement VGG16's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 class GraphVGG16Example : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    GraphVGG16Example()
+        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "VGG16")
     {
-        std::string data_path; /* Path to the trainable data */
-        std::string image;     /* Image data */
-        std::string label;     /* Label data */
+    }
+    bool do_setup(int argc, char **argv) override
+    {
+        // Parse arguments
+        cmd_parser.parse(argc, argv);
+
+        // Consume common parameters
+        common_params = consume_common_graph_parameters(common_opts);
+
+        // Return when help menu is requested
+        if(common_params.help)
+        {
+            cmd_parser.print_help(argv[0]);
+            return false;
+        }
+
+        // Checks
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+
+        // Print parameter values
+        std::cout << common_params << std::endl;
+
+        // Get trainable parameters data path
+        std::string data_path = common_params.data_path;
 
         // Create a preprocessor object
         const std::array<float, 3> mean_rgb{ { 123.68f, 116.779f, 103.939f } };
         std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb);
 
-        // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
-        const int  target      = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
-        Target     target_hint = set_target_hint(target);
-        const bool is_opencl   = target_hint == Target::CL;
+        // Create input descriptor
+        const TensorShape tensor_shape     = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
 
-        ConvolutionMethod first_convolution3x3_hint = is_opencl ? ConvolutionMethod::DIRECT : ConvolutionMethod::GEMM;
-        ConvolutionMethod convolution3x3_hint       = ConvolutionMethod::DEFAULT;
-        FastMathHint      fast_math_hint            = FastMathHint::DISABLED;
+        // Set weights trained layout
+        const DataLayout weights_layout = DataLayout::NCHW;
 
-        // Parse arguments
-        if(argc < 2)
-        {
-            // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 2)
-        {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 3)
-        {
-            data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No image provided: using random values\n\n";
-        }
-        else if(argc == 4)
-        {
-            data_path = argv[2];
-            image     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
-            std::cout << "No text file with labels provided: skipping output accessor\n\n";
-        }
-        else if(argc == 5)
-        {
-            data_path = argv[2];
-            image     = argv[3];
-            label     = argv[4];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
-            std::cout << "No fast math info provided: disabling fast math\n\n";
-        }
-        else
-        {
-            data_path      = argv[2];
-            image          = argv[3];
-            label          = argv[4];
-            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
-        }
-
-        graph << target_hint
-              << fast_math_hint
-              << first_convolution3x3_hint
-              << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32),
-                            get_input_accessor(image, std::move(preprocessor)))
+        // Create graph
+        graph << common_params.target
+              << common_params.fast_math_hint
+              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
               // Layer 1
               << ConvolutionLayer(
                   3U, 3U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv1_1")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_1/Relu")
-              << convolution3x3_hint
               // Layer 2
               << ConvolutionLayer(
                   3U, 3U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_2_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_2_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv1_2")
@@ -126,7 +102,7 @@
               // Layer 3
               << ConvolutionLayer(
                   3U, 3U, 128U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv2_1")
@@ -134,7 +110,7 @@
               // Layer 4
               << ConvolutionLayer(
                   3U, 3U, 128U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_2_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_2_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv2_2")
@@ -143,7 +119,7 @@
               // Layer 5
               << ConvolutionLayer(
                   3U, 3U, 256U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv3_1")
@@ -151,7 +127,7 @@
               // Layer 6
               << ConvolutionLayer(
                   3U, 3U, 256U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_2_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_2_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv3_2")
@@ -159,7 +135,7 @@
               // Layer 7
               << ConvolutionLayer(
                   3U, 3U, 256U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_3_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_3_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_3_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv3_3")
@@ -168,7 +144,7 @@
               // Layer 8
               << ConvolutionLayer(
                   3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv4_1")
@@ -176,7 +152,7 @@
               // Layer 9
               << ConvolutionLayer(
                   3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_2_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_2_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv4_2")
@@ -184,7 +160,7 @@
               // Layer 10
               << ConvolutionLayer(
                   3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_3_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_3_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_3_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv4_3")
@@ -193,7 +169,7 @@
               // Layer 11
               << ConvolutionLayer(
                   3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv5_1")
@@ -201,7 +177,7 @@
               // Layer 12
               << ConvolutionLayer(
                   3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_2_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_2_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv5_2")
@@ -209,7 +185,7 @@
               // Layer 13
               << ConvolutionLayer(
                   3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_3_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_3_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_3_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv5_3")
@@ -218,31 +194,36 @@
               // Layer 14
               << FullyConnectedLayer(
                   4096U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc6_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc6_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc6_b.npy"))
               .set_name("fc6")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu")
               // Layer 15
               << FullyConnectedLayer(
                   4096U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc7_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc7_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc7_b.npy"))
               .set_name("fc7")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu_1")
               // Layer 16
               << FullyConnectedLayer(
                   1000U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc8_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc8_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc8_b.npy"))
               .set_name("fc8")
               // Softmax
               << SoftmaxLayer().set_name("prob")
-              << OutputLayer(get_output_accessor(label, 5));
+              << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
-        config.use_tuner = (target == 2);
-        graph.finalize(target_hint, config);
+        config.num_threads = common_params.threads;
+        config.use_tuner   = common_params.enable_tuner;
+        config.tuner_file  = common_params.tuner_file;
+
+        graph.finalize(common_params.target, config);
+
+        return true;
     }
     void do_run() override
     {
@@ -251,13 +232,18 @@
     }
 
 private:
-    Stream graph{ 0, "VGG16" };
+    CommandLineParser  cmd_parser;
+    CommonGraphOptions common_opts;
+    CommonGraphParams  common_params;
+    Stream             graph;
 };
 
 /** Main program for VGG16
  *
+ * @note To list all the possible arguments execute the binary appended with the --help option
+ *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/graph_vgg19.cpp b/examples/graph_vgg19.cpp
index b15c3f2..8d64c1c 100644
--- a/examples/graph_vgg19.cpp
+++ b/examples/graph_vgg19.cpp
@@ -23,100 +23,74 @@
  */
 #include "arm_compute/graph.h"
 #include "support/ToolchainSupport.h"
+#include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
 #include "utils/Utils.h"
 
-#include <cstdlib>
-
 using namespace arm_compute::utils;
 using namespace arm_compute::graph::frontend;
 using namespace arm_compute::graph_utils;
-
 /** Example demonstrating how to implement VGG19's network using the Compute Library's graph API
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 class GraphVGG19Example : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    GraphVGG19Example()
+        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "VGG19")
     {
-        std::string data_path; /* Path to the trainable data */
-        std::string image;     /* Image data */
-        std::string label;     /* Label data */
+    }
+    bool do_setup(int argc, char **argv) override
+    {
+        // Parse arguments
+        cmd_parser.parse(argc, argv);
+
+        // Consume common parameters
+        common_params = consume_common_graph_parameters(common_opts);
+
+        // Return when help menu is requested
+        if(common_params.help)
+        {
+            cmd_parser.print_help(argv[0]);
+            return false;
+        }
+
+        // Checks
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+
+        // Print parameter values
+        std::cout << common_params << std::endl;
+
+        // Get trainable parameters data path
+        std::string data_path = common_params.data_path;
 
         // Create a preprocessor object
         const std::array<float, 3> mean_rgb{ { 123.68f, 116.779f, 103.939f } };
         std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb);
 
-        // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON
-        const int    target         = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0;
-        Target       target_hint    = set_target_hint(target);
-        FastMathHint fast_math_hint = FastMathHint::DISABLED;
-        const bool   is_opencl      = target_hint == Target::CL;
+        // Create input descriptor
+        const TensorShape tensor_shape     = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
 
-        ConvolutionMethod first_convolution3x3_hint = is_opencl ? ConvolutionMethod::DIRECT : ConvolutionMethod::GEMM;
-        ConvolutionMethod convolution3x3_hint       = ConvolutionMethod::DEFAULT;
+        // Set weights trained layout
+        const DataLayout weights_layout = DataLayout::NCHW;
 
-        // Parse arguments
-        if(argc < 2)
-        {
-            // Print help
-            std::cout << "Usage: " << argv[0] << " [target] [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 2)
-        {
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " [path_to_data] [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No data folder provided: using random values\n\n";
-        }
-        else if(argc == 3)
-        {
-            data_path = argv[2];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " [image] [labels] [fast_math_hint]\n\n";
-            std::cout << "No image provided: using random values\n\n";
-        }
-        else if(argc == 4)
-        {
-            data_path = argv[2];
-            image     = argv[3];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " [labels] [fast_math_hint]\n\n";
-            std::cout << "No text file with labels provided: skipping output accessor\n\n";
-        }
-        else if(argc == 5)
-        {
-            data_path = argv[2];
-            image     = argv[3];
-            label     = argv[4];
-            std::cout << "Usage: " << argv[0] << " " << argv[1] << " " << argv[2] << " " << argv[3] << " " << argv[4] << " [fast_math_hint]\n\n";
-            std::cout << "No fast math info provided: disabling fast math\n\n";
-        }
-        else
-        {
-            data_path      = argv[2];
-            image          = argv[3];
-            label          = argv[4];
-            fast_math_hint = (std::strtol(argv[5], nullptr, 1) == 0) ? FastMathHint::DISABLED : FastMathHint::ENABLED;
-        }
-
-        graph << target_hint
-              << first_convolution3x3_hint
-              << fast_math_hint
-              << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32),
-                            get_input_accessor(image, std::move(preprocessor)))
+        graph << common_params.target
+              << common_params.fast_math_hint
+              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
               // Layer 1
               << ConvolutionLayer(
                   3U, 3U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv1_1")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_1/Relu")
-              << convolution3x3_hint
               << ConvolutionLayer(
                   3U, 3U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_2_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_2_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv1_2")
@@ -125,14 +99,14 @@
               // Layer 2
               << ConvolutionLayer(
                   3U, 3U, 128U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv2_1")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2_1/Relu")
               << ConvolutionLayer(
                   3U, 3U, 128U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_2_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_2_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv2_2")
@@ -141,28 +115,28 @@
               // Layer 3
               << ConvolutionLayer(
                   3U, 3U, 256U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv3_1")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_1/Relu")
               << ConvolutionLayer(
                   3U, 3U, 256U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_2_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_2_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv3_2")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_2/Relu")
               << ConvolutionLayer(
                   3U, 3U, 256U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_3_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_3_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_3_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv3_3")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_3/Relu")
               << ConvolutionLayer(
                   3U, 3U, 256U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_4_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_4_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_4_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv3_4")
@@ -171,28 +145,28 @@
               // Layer 4
               << ConvolutionLayer(
                   3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv4_1")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_1/Relu")
               << ConvolutionLayer(
                   3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_2_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_2_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv4_2")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_2/Relu")
               << ConvolutionLayer(
                   3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_3_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_3_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_3_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv4_3")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_3/Relu")
               << ConvolutionLayer(
                   3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_4_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_4_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_4_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv4_4")
@@ -201,28 +175,28 @@
               // Layer 5
               << ConvolutionLayer(
                   3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_1_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_1_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_1_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv5_1")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_1/Relu")
               << ConvolutionLayer(
                   3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_2_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_2_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_2_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv5_2")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_2/Relu")
               << ConvolutionLayer(
                   3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_3_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_3_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_3_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv5_3")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_3/Relu")
               << ConvolutionLayer(
                   3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_4_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_4_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_4_b.npy"),
                   PadStrideInfo(1, 1, 1, 1))
               .set_name("conv5_4")
@@ -231,31 +205,36 @@
               // Layer 6
               << FullyConnectedLayer(
                   4096U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc6_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc6_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc6_b.npy"))
               .set_name("fc6")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu")
               // Layer 7
               << FullyConnectedLayer(
                   4096U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc7_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc7_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc7_b.npy"))
               .set_name("fc7")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu_1")
               // Layer 8
               << FullyConnectedLayer(
                   1000U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc8_w.npy"),
+                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc8_w.npy", weights_layout),
                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc8_b.npy"))
               .set_name("fc8")
               // Softmax
               << SoftmaxLayer().set_name("prob")
-              << OutputLayer(get_output_accessor(label, 5));
+              << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
-        config.use_tuner = (target == 2);
-        graph.finalize(target_hint, config);
+        config.num_threads = common_params.threads;
+        config.use_tuner   = common_params.enable_tuner;
+        config.tuner_file  = common_params.tuner_file;
+
+        graph.finalize(common_params.target, config);
+
+        return true;
     }
     void do_run() override
     {
@@ -264,13 +243,18 @@
     }
 
 private:
-    Stream graph{ 0, "VGG19" };
+    CommandLineParser  cmd_parser;
+    CommonGraphOptions common_opts;
+    CommonGraphParams  common_params;
+    Stream             graph;
 };
 
 /** Main program for VGG19
  *
+ * @note To list all the possible arguments execute the binary appended with the --help option
+ *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) )
+ * @param[in] argv Arguments
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/neon_cartoon_effect.cpp b/examples/neon_cartoon_effect.cpp
index da8ce3f..4285aa4 100644
--- a/examples/neon_cartoon_effect.cpp
+++ b/examples/neon_cartoon_effect.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2018 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,6 +25,7 @@
 #include "arm_compute/runtime/NEON/NEFunctions.h"
 
 #include "arm_compute/core/Types.h"
+#include "utils/ImageLoader.h"
 #include "utils/Utils.h"
 
 using namespace arm_compute;
@@ -33,7 +34,7 @@
 class NEONCartoonEffectExample : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    bool do_setup(int argc, char **argv) override
     {
         // Open PPM file
         PPMLoader ppm;
@@ -74,6 +75,8 @@
             ppm.fill_image(src_img);
             output_filename = std::string(argv[1]) + "_out.ppm";
         }
+
+        return true;
     }
 
     void do_run() override
diff --git a/examples/neon_cnn.cpp b/examples/neon_cnn.cpp
index 05b6c83..1df8125 100644
--- a/examples/neon_cnn.cpp
+++ b/examples/neon_cnn.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2018 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,7 +36,7 @@
 class NEONCNNExample : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    bool do_setup(int argc, char **argv) override
     {
         ARM_COMPUTE_UNUSED(argc);
         ARM_COMPUTE_UNUSED(argv);
@@ -227,6 +227,8 @@
 
         // Finalize the manager. (Validity checks, memory allocations etc)
         mm_transitions->finalize();
+
+        return true;
     }
     void do_run() override
     {
diff --git a/examples/neon_convolution.cpp b/examples/neon_convolution.cpp
index d51d201..1a7e865 100644
--- a/examples/neon_convolution.cpp
+++ b/examples/neon_convolution.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2018 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,6 +24,7 @@
 #include "arm_compute/runtime/NEON/NEFunctions.h"
 
 #include "arm_compute/core/Types.h"
+#include "utils/ImageLoader.h"
 #include "utils/Utils.h"
 
 using namespace arm_compute;
@@ -52,7 +53,7 @@
 class NEONConvolutionExample : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    bool do_setup(int argc, char **argv) override
     {
         /** [Accurate padding] **/
         PPMLoader ppm;
@@ -93,6 +94,8 @@
             output_filename = std::string(argv[1]) + "_out.ppm";
         }
         /** [Accurate padding] **/
+
+        return true;
     }
     void do_run() override
     {
diff --git a/examples/neon_copy_objects.cpp b/examples/neon_copy_objects.cpp
index 9409cf3..84a2abd 100644
--- a/examples/neon_copy_objects.cpp
+++ b/examples/neon_copy_objects.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2018 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,7 +36,7 @@
 class NEONCopyObjectsExample : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    bool do_setup(int argc, char **argv) override
     {
         ARM_COMPUTE_UNUSED(argc);
         ARM_COMPUTE_UNUSED(argv);
@@ -135,6 +135,8 @@
         output_it);
 
         /** [Copy objects example] */
+
+        return true;
     }
     void do_run() override
     {
diff --git a/examples/neon_scale.cpp b/examples/neon_scale.cpp
index bec277d..b04d916 100644
--- a/examples/neon_scale.cpp
+++ b/examples/neon_scale.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2018 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,6 +24,7 @@
 #include "arm_compute/runtime/NEON/NEFunctions.h"
 
 #include "arm_compute/core/Types.h"
+#include "utils/ImageLoader.h"
 #include "utils/Utils.h"
 
 using namespace arm_compute;
@@ -32,7 +33,7 @@
 class NEONScaleExample : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    bool do_setup(int argc, char **argv) override
     {
         PPMLoader ppm;
 
@@ -71,6 +72,8 @@
             ppm.fill_image(src);
             output_filename = std::string(argv[1]) + "_out.ppm";
         }
+
+        return true;
     }
     void do_run() override
     {
diff --git a/examples/neoncl_scale_median_gaussian.cpp b/examples/neoncl_scale_median_gaussian.cpp
index 084005f..1b26517 100644
--- a/examples/neoncl_scale_median_gaussian.cpp
+++ b/examples/neoncl_scale_median_gaussian.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2018 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,6 +29,7 @@
 #include "arm_compute/runtime/CL/CLFunctions.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "arm_compute/runtime/NEON/NEFunctions.h"
+#include "utils/ImageLoader.h"
 #include "utils/Utils.h"
 
 using namespace arm_compute;
@@ -42,7 +43,7 @@
 class NEONCLScaleMedianGaussianExample : public Example
 {
 public:
-    void do_setup(int argc, char **argv) override
+    bool do_setup(int argc, char **argv) override
     {
         /** [NEON / OpenCL Interop] */
         PPMLoader ppm;
@@ -87,6 +88,8 @@
             const std::string output_filename = std::string(argv[1]) + "_out.ppm";
         }
         /** [NEON / OpenCL Interop] */
+
+        return true;
     }
     void do_run() override
     {