arm_compute v18.08
diff --git a/src/graph/Graph.cpp b/src/graph/Graph.cpp
index e1ffeed..88e2682 100644
--- a/src/graph/Graph.cpp
+++ b/src/graph/Graph.cpp
@@ -41,17 +41,24 @@
 
     std::unique_ptr<INode> &node = _nodes[nid];
 
-    // Remove node connections
     if(node)
     {
+        // Remove input connections
         for(auto &input_eid : node->_input_edges)
         {
             remove_connection(input_eid);
         }
-        for(auto &outpud_eid : node->_output_edges)
+
+        // Remove output connections
+        std::set<EdgeID> output_edges_copy = node->output_edges();
+        for(auto &outpud_eid : output_edges_copy)
         {
             remove_connection(outpud_eid);
         }
+
+        // Remove nid from tagged nodes
+        std::vector<NodeID> &tnodes = _tagged_nodes.at(node->type());
+        tnodes.erase(std::remove(tnodes.begin(), tnodes.end(), nid), tnodes.end());
     }
 
     node = nullptr;
@@ -164,9 +171,9 @@
     return _id;
 }
 
-const std::vector<NodeID> &Graph::inputs()
+const std::vector<NodeID> &Graph::nodes(NodeType type)
 {
-    return _tagged_nodes[NodeType::Input];
+    return _tagged_nodes[type];
 }
 
 std::vector<std::unique_ptr<INode>> &Graph::nodes()
diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp
index 4c5d30a..81a18c4 100644
--- a/src/graph/GraphBuilder.cpp
+++ b/src/graph/GraphBuilder.cpp
@@ -25,9 +25,11 @@
 
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/Utils.h"
-#include "arm_compute/graph/algorithms/BFS.h"
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
 #include "arm_compute/graph/nodes/Nodes.h"
 
+#include "support/ToolchainSupport.h"
+
 #define CHECK_NODEIDX_PAIR(pair, g) \
     ARM_COMPUTE_ERROR_ON(((pair).node_id >= (g).nodes().size()) || ((g).node((pair).node_id) == nullptr) || ((pair).index >= (g).node((pair).node_id)->num_outputs()));
 
@@ -79,43 +81,6 @@
 
     return nid;
 }
-
-NodeID create_grouped_convolution(Graph &g, NodeParams &params, NodeIdxPair input, NodeID weights, NodeID bias,
-                                  PadStrideInfo conv_info, ConvolutionMethod method, FastMathHint fast_math_hint, unsigned int num_groups)
-{
-    bool has_bias = (bias != EmptyNodeID);
-
-    // Split input
-    NodeID input_split = GraphBuilder::add_split_node(g, params, input, num_groups, 2);
-
-    // Split weights
-    NodeID weights_split = GraphBuilder::add_split_node(g, params, { weights, 0 }, num_groups, 3);
-
-    // Split bias
-    NodeID bias_split = EmptyNodeID;
-    if(has_bias)
-    {
-        // Split bias
-        bias_split = GraphBuilder::add_split_node(g, params, { bias, 0 }, num_groups, 0);
-    }
-
-    std::vector<NodeIdxPair> convolution_outputs;
-    for(unsigned int i = 0; i < num_groups; ++i)
-    {
-        NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, method, fast_math_hint);
-        g.add_connection(input_split, i, conv_nid, 0);
-        g.add_connection(weights_split, i, conv_nid, 1);
-        if(has_bias)
-        {
-            g.add_connection(bias_split, i, conv_nid, 2);
-        }
-        set_node_params(g, conv_nid, params);
-        convolution_outputs.push_back({ conv_nid, 0 });
-    }
-
-    // Depth concatenate output
-    return GraphBuilder::add_depth_concatenate_node(g, params, convolution_outputs);
-}
 } // namespace
 
 NodeID GraphBuilder::add_const_node(Graph &g, NodeParams params, TensorDescriptor desc, ITensorAccessorUPtr accessor)
@@ -203,6 +168,11 @@
     return batch_norm_nid;
 }
 
+NodeID GraphBuilder::add_channel_shuffle_node(Graph &g, NodeParams params, NodeIdxPair input, unsigned int num_groups)
+{
+    return create_simple_single_input_output_node<ChannelShuffleLayerNode>(g, params, input, num_groups);
+}
+
 NodeID GraphBuilder::add_convolution_node(Graph &g, NodeParams params, NodeIdxPair input,
                                           Size2D kernel_spatial_extend, unsigned int depth, PadStrideInfo conv_info,
                                           unsigned int num_groups, ConvolutionMethod method, FastMathHint fast_math_hint,
@@ -239,34 +209,81 @@
     {
         TensorDescriptor b_desc = input_tensor_desc;
         b_desc.shape            = TensorShape(depth);
-        b_nid                   = add_const_node_with_name(g, params, "Bias", b_desc, std::move(bias_accessor));
-    }
-
-    if(num_groups == 1)
-    {
-        // Create convolution node and connect
-        NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, method, fast_math_hint, out_quant_info);
-        g.add_connection(input.node_id, input.index, conv_nid, 0);
-        g.add_connection(w_nid, 0, conv_nid, 1);
-        if(has_bias)
+        if(is_data_type_quantized_asymmetric(input_tensor_desc.data_type))
         {
-            g.add_connection(b_nid, 0, conv_nid, 2);
+            b_desc.data_type = DataType::S32;
         }
-        set_node_params(g, conv_nid, params);
+        b_nid = add_const_node_with_name(g, params, "Bias", b_desc, std::move(bias_accessor));
+    }
 
-        return conv_nid;
-    }
-    else
+    // Create convolution node and connect
+    NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, num_groups, method, fast_math_hint, out_quant_info);
+    g.add_connection(input.node_id, input.index, conv_nid, 0);
+    g.add_connection(w_nid, 0, conv_nid, 1);
+    if(has_bias)
     {
-        return create_grouped_convolution(g, params, input, w_nid, b_nid, conv_info, method, fast_math_hint, num_groups);
+        g.add_connection(b_nid, 0, conv_nid, 2);
     }
+    set_node_params(g, conv_nid, params);
+
+    return conv_nid;
 }
 
-NodeID GraphBuilder::add_depth_concatenate_node(Graph &g, NodeParams params, std::vector<NodeIdxPair> inputs)
+NodeID GraphBuilder::add_deconvolution_node(Graph &g, NodeParams params, NodeIdxPair input,
+                                            Size2D kernel_spatial_extend, unsigned int depth, PadStrideInfo deconv_info,
+                                            Size2D inner_border, ITensorAccessorUPtr weights_accessor,
+                                            ITensorAccessorUPtr bias_accessor)
+{
+    CHECK_NODEIDX_PAIR(input, g);
+    ARM_COMPUTE_ERROR_ON(depth == 0);
+    ARM_COMPUTE_ERROR_ON((kernel_spatial_extend.width == 0) || (kernel_spatial_extend.height == 0));
+
+    bool has_bias = (bias_accessor != nullptr);
+
+    // Get input tensor descriptor
+    const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
+
+    // Create weights node
+    TensorDescriptor w_desc = input_tensor_desc;
+    w_desc.shape.set(get_dimension_idx(input_tensor_desc, DataLayoutDimension::WIDTH), kernel_spatial_extend.width);
+    w_desc.shape.set(get_dimension_idx(input_tensor_desc, DataLayoutDimension::HEIGHT), kernel_spatial_extend.height);
+    w_desc.shape.set(get_dimension_idx(input_tensor_desc, DataLayoutDimension::CHANNEL),
+                     get_dimension_size(input_tensor_desc, DataLayoutDimension::CHANNEL));
+    w_desc.shape.set(get_dimension_idx(input_tensor_desc, DataLayoutDimension::BATCHES), depth);
+
+    NodeID w_nid = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor));
+
+    // Create bias nodes
+    NodeID b_nid = EmptyNodeID;
+    if(has_bias)
+    {
+        TensorDescriptor b_desc = input_tensor_desc;
+        b_desc.shape            = TensorShape(depth);
+        if(is_data_type_quantized_asymmetric(input_tensor_desc.data_type))
+        {
+            b_desc.data_type = DataType::S32;
+        }
+        b_nid = add_const_node_with_name(g, params, "Bias", b_desc, std::move(bias_accessor));
+    }
+
+    // Create convolution node and connect
+    NodeID deconv_nid = g.add_node<DeconvolutionLayerNode>(deconv_info, inner_border);
+    g.add_connection(input.node_id, input.index, deconv_nid, 0);
+    g.add_connection(w_nid, 0, deconv_nid, 1);
+    if(has_bias)
+    {
+        g.add_connection(b_nid, 0, deconv_nid, 2);
+    }
+    set_node_params(g, deconv_nid, params);
+
+    return deconv_nid;
+}
+
+NodeID GraphBuilder::add_concatenate_node(Graph &g, NodeParams params, std::vector<NodeIdxPair> inputs, DataLayoutDimension axis)
 {
     ARM_COMPUTE_ERROR_ON(inputs.size() == 0);
 
-    NodeID nid = g.add_node<DepthConcatenateLayerNode>(inputs.size());
+    NodeID nid = g.add_node<ConcatenateLayerNode>(inputs.size(), axis);
 
     unsigned int i = 0;
     for(const auto &input : inputs)
@@ -309,7 +326,7 @@
     if(has_bias)
     {
         TensorDescriptor b_desc = input_tensor_desc;
-        b_desc.shape            = TensorShape(b_desc.shape.z());
+        b_desc.shape            = TensorShape(get_dimension_size(input_tensor_desc, DataLayoutDimension::CHANNEL));
         b_nid                   = add_const_node_with_name(g, params, "Bias", b_desc, std::move(bias_accessor));
     }
 
@@ -326,6 +343,11 @@
     return conv_nid;
 }
 
+NodeID GraphBuilder::add_dummy_node(Graph &g, NodeParams params, NodeIdxPair input, TensorShape shape)
+{
+    return create_simple_single_input_output_node<DummyNode>(g, params, input, shape);
+}
+
 NodeID GraphBuilder::add_elementwise_node(Graph &g, NodeParams params, NodeIdxPair input0, NodeIdxPair input1, EltwiseOperation operation)
 {
     CHECK_NODEIDX_PAIR(input0, g);
@@ -347,7 +369,9 @@
 }
 
 NodeID GraphBuilder::add_fully_connected_layer(Graph &g, NodeParams params, NodeIdxPair input, unsigned int num_outputs,
-                                               ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor)
+                                               ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor,
+                                               const FullyConnectedLayerInfo fc_info,
+                                               const QuantizationInfo weights_quant_info, const QuantizationInfo out_quant_info)
 {
     CHECK_NODEIDX_PAIR(input, g);
     ARM_COMPUTE_ERROR_ON(num_outputs == 0);
@@ -358,7 +382,7 @@
     const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
 
     // Create weights node
-    TensorDescriptor w_desc = FullyConnectedLayerNode::compute_weights_descriptor(input_tensor_desc, num_outputs);
+    TensorDescriptor w_desc = FullyConnectedLayerNode::compute_weights_descriptor(input_tensor_desc, num_outputs, fc_info, weights_quant_info);
     NodeID           w_nid  = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor));
 
     // Create bias nodes
@@ -367,11 +391,15 @@
     {
         TensorDescriptor b_desc = input_tensor_desc;
         b_desc.shape            = TensorShape(num_outputs);
-        b_nid                   = add_const_node_with_name(g, params, "Bias", b_desc, std::move(bias_accessor));
+        if(is_data_type_quantized_asymmetric(input_tensor_desc.data_type))
+        {
+            b_desc.data_type = DataType::S32;
+        }
+        b_nid = add_const_node_with_name(g, params, "Bias", b_desc, std::move(bias_accessor));
     }
 
-    // Create convolution node and connect
-    NodeID fc_nid = g.add_node<FullyConnectedLayerNode>(num_outputs);
+    // Create fully connected node and connect
+    NodeID fc_nid = g.add_node<FullyConnectedLayerNode>(num_outputs, out_quant_info, fc_info);
     g.add_connection(input.node_id, input.index, fc_nid, 0);
     g.add_connection(w_nid, 0, fc_nid, 1);
     if(has_bias)
@@ -389,6 +417,11 @@
     return create_simple_single_input_output_node<NormalizationLayerNode>(g, params, input, norm_info);
 }
 
+NodeID GraphBuilder::add_permute_node(Graph &g, NodeParams params, NodeIdxPair input, PermutationVector perm, DataLayout layout)
+{
+    return create_simple_single_input_output_node<PermuteLayerNode>(g, params, input, perm, layout);
+}
+
 NodeID GraphBuilder::add_pooling_node(Graph &g, NodeParams params, NodeIdxPair input, PoolingLayerInfo pool_info)
 {
     return create_simple_single_input_output_node<PoolingLayerNode>(g, params, input, pool_info);
@@ -399,6 +432,12 @@
     return create_simple_single_input_output_node<ReshapeLayerNode>(g, params, input, shape);
 }
 
+NodeID GraphBuilder::add_resize_node(Graph &g, NodeParams params, NodeIdxPair input, InterpolationPolicy policy,
+                                     float width_scale, float height_scale)
+{
+    return create_simple_single_input_output_node<ResizeLayerNode>(g, params, input, policy, width_scale, height_scale);
+}
+
 NodeID GraphBuilder::add_scale_layer(Graph &g, const NodeParams &params, NodeIdxPair input, ITensorAccessorUPtr mul_accessor, ITensorAccessorUPtr add_accessor)
 {
     CHECK_NODEIDX_PAIR(input, g);
@@ -421,9 +460,9 @@
     NodeIdxPair      add_const_nidxp = { add_const_nid, 0 };
 
     // Create node and connect
-    NodeID      mul_node      = GraphBuilder::add_elementwise_node(g, params, input, mul_const_nidxp, EltwiseOperation::MUL);
+    NodeID      mul_node      = GraphBuilder::add_elementwise_node(g, params, input, mul_const_nidxp, EltwiseOperation::Mul);
     NodeIdxPair mulnode_nidxp = { mul_node, 0 };
-    NodeID      add_node      = GraphBuilder::add_elementwise_node(g, params, mulnode_nidxp, add_const_nidxp, EltwiseOperation::ADD);
+    NodeID      add_node      = GraphBuilder::add_elementwise_node(g, params, mulnode_nidxp, add_const_nidxp, EltwiseOperation::Add);
 
     return add_node;
 }
@@ -438,4 +477,4 @@
     return create_simple_single_input_output_node<SplitLayerNode>(g, params, input, num_splits, axis);
 }
 } // namespace graph
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/GraphContext.cpp b/src/graph/GraphContext.cpp
index 3f31114..5f33ed3 100644
--- a/src/graph/GraphContext.cpp
+++ b/src/graph/GraphContext.cpp
@@ -22,7 +22,9 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph/GraphContext.h"
-#include <arm_compute/graph.h>
+
+#include "arm_compute/graph.h"
+#include "arm_compute/graph/Utils.h"
 
 namespace arm_compute
 {
@@ -33,6 +35,12 @@
 {
 }
 
+GraphContext::~GraphContext()
+{
+    _memory_managers.clear();
+    release_default_graph_context(*this);
+}
+
 const GraphConfig &GraphContext::config() const
 {
     return _config;
@@ -82,4 +90,4 @@
     }
 }
 } // namespace graph
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/GraphManager.cpp b/src/graph/GraphManager.cpp
index ad45845..f9d13ac 100644
--- a/src/graph/GraphManager.cpp
+++ b/src/graph/GraphManager.cpp
@@ -27,10 +27,13 @@
 #include "arm_compute/graph/GraphContext.h"
 #include "arm_compute/graph/Logger.h"
 #include "arm_compute/graph/PassManager.h"
+#include "arm_compute/graph/TypePrinter.h"
 #include "arm_compute/graph/Utils.h"
 #include "arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h"
 #include "arm_compute/graph/detail/ExecutionHelpers.h"
 
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
+
 namespace arm_compute
 {
 namespace graph
@@ -38,7 +41,6 @@
 GraphManager::GraphManager()
     : _workloads()
 {
-    detail::default_initialize_backends();
 }
 
 void GraphManager::finalize_graph(Graph &graph, GraphContext &ctx, PassManager &pm, Target target)
@@ -53,7 +55,12 @@
     }
 
     // Force target to all graph construct
-    Target forced_target = is_target_supported(target) ? target : get_default_target();
+    Target forced_target = target;
+    if(!is_target_supported(target))
+    {
+        forced_target = get_default_target();
+        ARM_COMPUTE_LOG_GRAPH_INFO("Switching target from " << target << " to " << forced_target << std::endl);
+    }
     force_target_to_graph(graph, forced_target);
 
     // Configure all tensors
@@ -62,22 +69,22 @@
     // Apply all mutating passes
     pm.run_all(graph);
 
+    // Perform topological sort
+    std::vector<NodeID> topological_sorted_nodes = dfs(graph);
+
     // Validate all nodes
     detail::validate_all_nodes(graph);
 
     // Configure all nodes
-    auto workload = detail::configure_all_nodes(graph, ctx);
+    auto workload = detail::configure_all_nodes(graph, ctx, topological_sorted_nodes);
     ARM_COMPUTE_ERROR_ON_MSG(workload.tasks.empty(), "Could not configure all nodes!");
 
     // Allocate const tensors and call accessors
     detail::allocate_const_tensors(graph);
     detail::call_all_const_node_accessors(graph);
 
-    if(forced_target == Target::CL)
-    {
-        // Prepare graph
-        detail::prepare_all_tasks(workload);
-    }
+    // Prepare graph
+    detail::prepare_all_tasks(workload);
 
     // Setup tensor memory (Allocate all tensors or setup transition manager)
     if(ctx.config().use_transition_memory_manager)
@@ -95,15 +102,6 @@
     // Register graph
     _workloads.insert(std::make_pair(graph.id(), std::move(workload)));
     ARM_COMPUTE_LOG_GRAPH_VERBOSE("Created workload for graph with ID : " << graph.id().get() << std::endl);
-
-    if(forced_target != Target::CL)
-    {
-        // Make first run
-        execute_graph(graph);
-
-        // Release all unused const tensors
-        detail::release_unused_tensors(graph);
-    }
 }
 
 void GraphManager::execute_graph(Graph &graph)
@@ -112,14 +110,23 @@
     auto it = _workloads.find(graph.id());
     ARM_COMPUTE_ERROR_ON_MSG(it == std::end(_workloads), "Graph is not registered!");
 
-    // Call input accessors
-    detail::call_all_input_node_accessors(it->second);
+    while(true)
+    {
+        // Call input accessors
+        if(!detail::call_all_input_node_accessors(it->second))
+        {
+            return;
+        }
 
-    // Run graph
-    detail::call_all_tasks(it->second);
+        // Run graph
+        detail::call_all_tasks(it->second);
 
-    // Call output accessors
-    detail::call_all_output_node_accessors(it->second);
+        // Call output accessors
+        if(!detail::call_all_output_node_accessors(it->second))
+        {
+            return;
+        }
+    }
 }
 
 void GraphManager::invalidate_graph(Graph &graph)
diff --git a/src/graph/INode.cpp b/src/graph/INode.cpp
index cd9a46a..b0c3137 100644
--- a/src/graph/INode.cpp
+++ b/src/graph/INode.cpp
@@ -185,6 +185,11 @@
     return _outputs.size();
 }
 
+NodeParams INode::common_node_params() const
+{
+    return _common_params;
+}
+
 Target INode::requested_target() const
 {
     return _common_params.target;
diff --git a/src/graph/Tensor.cpp b/src/graph/Tensor.cpp
index 287e783..9850128 100644
--- a/src/graph/Tensor.cpp
+++ b/src/graph/Tensor.cpp
@@ -67,6 +67,11 @@
     return _accessor.get();
 }
 
+std::unique_ptr<ITensorAccessor> Tensor::extract_accessor()
+{
+    return std::move(_accessor);
+}
+
 bool Tensor::call_accessor()
 {
     // Early exit guard
@@ -85,12 +90,12 @@
     }
 
     // Call accessor
-    _accessor->access_tensor(_handle->tensor());
+    bool retval = _accessor->access_tensor(_handle->tensor());
 
     // Unmap tensor
     _handle->unmap();
 
-    return true;
+    return retval;
 }
 
 void Tensor::bind_edge(EdgeID eid)
diff --git a/src/graph/TypeLoader.cpp b/src/graph/TypeLoader.cpp
new file mode 100644
index 0000000..30a3546
--- /dev/null
+++ b/src/graph/TypeLoader.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWNISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/TypeLoader.h"
+
+#include "arm_compute/core/utils/misc/Utility.h"
+
+#include <map>
+
+namespace arm_compute
+{
+arm_compute::DataType data_type_from_name(const std::string &name)
+{
+    static const std::map<std::string, arm_compute::DataType> data_types =
+    {
+        { "f16", DataType::F16 },
+        { "f32", DataType::F32 },
+        { "qasymm8", DataType::QASYMM8 },
+    };
+
+    try
+    {
+        return data_types.at(arm_compute::utility::tolower(name));
+    }
+    catch(const std::out_of_range &)
+    {
+        throw std::invalid_argument(name);
+    }
+}
+
+arm_compute::DataLayout data_layout_from_name(const std::string &name)
+{
+    static const std::map<std::string, arm_compute::DataLayout> data_layouts =
+    {
+        { "nhwc", DataLayout::NHWC },
+        { "nchw", DataLayout::NCHW },
+    };
+
+    try
+    {
+        return data_layouts.at(arm_compute::utility::tolower(name));
+    }
+    catch(const std::out_of_range &)
+    {
+        throw std::invalid_argument(name);
+    }
+}
+namespace graph
+{
+Target target_from_name(const std::string &name)
+{
+    static const std::map<std::string, Target> targets =
+    {
+        { "neon", Target::NEON },
+        { "cl", Target::CL },
+        { "gles", Target::GC },
+    };
+
+    try
+    {
+        return targets.at(arm_compute::utility::tolower(name));
+    }
+    catch(const std::out_of_range &)
+    {
+        throw std::invalid_argument(name);
+    }
+}
+} // namespace graph
+} // namespace arm_compute
diff --git a/src/graph/Utils.cpp b/src/graph/Utils.cpp
index 030fa2d..0a85a7f 100644
--- a/src/graph/Utils.cpp
+++ b/src/graph/Utils.cpp
@@ -78,22 +78,44 @@
 {
     PassManager pm;
 
+    // Passes that mutate graph IR
+    pm.append(support::cpp14::make_unique<GroupedConvolutionMutator>());
     if(target != Target::GC)
     {
-        pm.append(support::cpp14::make_unique<InPlaceOperationMutator>());
         pm.append(support::cpp14::make_unique<NodeFusionMutator>());
-        pm.append(support::cpp14::make_unique<SplitLayerSubTensorMutator>());
-        pm.append(support::cpp14::make_unique<DepthConcatSubTensorMutator>());
+        pm.append(support::cpp14::make_unique<InPlaceOperationMutator>());
     }
 
+    // Passes that mutate backend information
+    if(target != Target::GC)
+    {
+        pm.append(support::cpp14::make_unique<DepthConcatSubTensorMutator>());
+        pm.append(support::cpp14::make_unique<SplitLayerSubTensorMutator>());
+    }
+    pm.append(support::cpp14::make_unique<NodeExecutionMethodMutator>());
+
     return pm;
 }
 
+void release_default_graph_context(GraphContext &ctx)
+{
+    for(const auto &backend : backends::BackendRegistry::get().backends())
+    {
+        if(backend.second->is_backend_supported())
+        {
+            backend.second->release_backend_context(ctx);
+        }
+    }
+}
+
 void setup_default_graph_context(GraphContext &ctx)
 {
     for(const auto &backend : backends::BackendRegistry::get().backends())
     {
-        backend.second->setup_backend_context(ctx);
+        if(backend.second->is_backend_supported())
+        {
+            backend.second->setup_backend_context(ctx);
+        }
     }
 }
 
@@ -131,5 +153,37 @@
             break;
     }
 }
+
+std::vector<NodeIdxPair> get_driving_nodes(const INode &node)
+{
+    std::vector<NodeIdxPair> driving_nodes;
+
+    const Graph *g = node.graph();
+    ARM_COMPUTE_ERROR_ON(g == nullptr);
+
+    for(auto &output_edge_id : node.output_edges())
+    {
+        auto output_edge = g->edge(output_edge_id);
+        if(output_edge != nullptr)
+        {
+            ARM_COMPUTE_ERROR_ON(output_edge->consumer() == nullptr);
+            driving_nodes.push_back({ output_edge->consumer_id(), output_edge->consumer_idx() });
+        }
+    }
+
+    return driving_nodes;
+}
+
+void configure_tensor(Tensor *tensor)
+{
+    if(tensor != nullptr && tensor->handle() == nullptr)
+    {
+        Target                         target  = tensor->desc().target;
+        backends::IDeviceBackend      &backend = backends::BackendRegistry::get().get_backend(target);
+        std::unique_ptr<ITensorHandle> handle  = backend.create_tensor(*tensor);
+        ARM_COMPUTE_ERROR_ON_MSG(!handle, "Couldn't create backend handle!");
+        tensor->set_handle(std::move(handle));
+    }
+}
 } // namespace graph
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/algorithms/TopologicalSort.cpp b/src/graph/algorithms/TopologicalSort.cpp
new file mode 100644
index 0000000..0fbf6e3
--- /dev/null
+++ b/src/graph/algorithms/TopologicalSort.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
+
+#include "arm_compute/graph/Graph.h"
+
+#include "arm_compute/core/utils/misc/Iterable.h"
+
+#include <list>
+#include <stack>
+
+namespace arm_compute
+{
+namespace graph
+{
+namespace detail
+{
+/** Checks if all the input dependencies of a node have been visited
+ *
+ * @param[in] node    Node to check
+ * @param[in] visited Vector that contains the visited information
+ *
+ * @return True if all inputs dependencies have been visited else false
+ */
+inline bool all_inputs_are_visited(const INode *node, const std::vector<bool> &visited)
+{
+    ARM_COMPUTE_ERROR_ON(node == nullptr);
+    const Graph *graph = node->graph();
+    ARM_COMPUTE_ERROR_ON(graph == nullptr);
+
+    bool are_all_visited = true;
+    for(const auto &input_edge_id : node->input_edges())
+    {
+        if(input_edge_id != EmptyNodeID)
+        {
+            const Edge *input_edge = graph->edge(input_edge_id);
+            ARM_COMPUTE_ERROR_ON(input_edge == nullptr);
+            ARM_COMPUTE_ERROR_ON(input_edge->producer() == nullptr);
+            if(!visited[input_edge->producer_id()])
+            {
+                are_all_visited = false;
+                break;
+            }
+        }
+    }
+
+    return are_all_visited;
+}
+} // namespace detail
+
+std::vector<NodeID> bfs(Graph &g)
+{
+    std::vector<NodeID> bfs_order_vector;
+
+    // Created visited vector
+    std::vector<bool> visited(g.nodes().size(), false);
+
+    // Create BFS queue
+    std::list<NodeID> queue;
+
+    // Push inputs and mark as visited
+    for(auto &input : g.nodes(NodeType::Input))
+    {
+        if(input != EmptyNodeID)
+        {
+            visited[input] = true;
+            queue.push_back(input);
+        }
+    }
+
+    // Push const nodes and mark as visited
+    for(auto &const_node : g.nodes(NodeType::Const))
+    {
+        if(const_node != EmptyNodeID)
+        {
+            visited[const_node] = true;
+            queue.push_back(const_node);
+        }
+    }
+
+    // Iterate over vector and edges
+    while(!queue.empty())
+    {
+        // Dequeue a node from queue and process
+        NodeID n = queue.front();
+        bfs_order_vector.push_back(n);
+        queue.pop_front();
+
+        const INode *node = g.node(n);
+        ARM_COMPUTE_ERROR_ON(node == nullptr);
+        for(const auto &eid : node->output_edges())
+        {
+            const Edge *e = g.edge(eid);
+            ARM_COMPUTE_ERROR_ON(e == nullptr);
+            if(!visited[e->consumer_id()] && detail::all_inputs_are_visited(e->consumer(), visited))
+            {
+                visited[e->consumer_id()] = true;
+                queue.push_back(e->consumer_id());
+            }
+        }
+    }
+
+    return bfs_order_vector;
+}
+
+std::vector<NodeID> dfs(Graph &g)
+{
+    std::vector<NodeID> dfs_order_vector;
+
+    // Created visited vector
+    std::vector<bool> visited(g.nodes().size(), false);
+
+    // Create DFS stack
+    std::stack<NodeID> stack;
+
+    // Push inputs and mark as visited
+    for(auto &input : g.nodes(NodeType::Input))
+    {
+        if(input != EmptyNodeID)
+        {
+            visited[input] = true;
+            stack.push(input);
+        }
+    }
+
+    // Push const nodes and mark as visited
+    for(auto &const_node : g.nodes(NodeType::Const))
+    {
+        if(const_node != EmptyNodeID)
+        {
+            visited[const_node] = true;
+            stack.push(const_node);
+        }
+    }
+
+    // Iterate over vector and edges
+    while(!stack.empty())
+    {
+        // Pop a node from stack and process
+        NodeID n = stack.top();
+        dfs_order_vector.push_back(n);
+        stack.pop();
+
+        // Mark node as visited
+        if(!visited[n])
+        {
+            visited[n] = true;
+        }
+
+        const INode *node = g.node(n);
+        ARM_COMPUTE_ERROR_ON(node == nullptr);
+        // Reverse iterate to push branches from right to left and pop on the opposite order
+        for(const auto &eid : arm_compute::utils::iterable::reverse_iterate(node->output_edges()))
+        {
+            const Edge *e = g.edge(eid);
+            ARM_COMPUTE_ERROR_ON(e == nullptr);
+            if(!visited[e->consumer_id()] && detail::all_inputs_are_visited(e->consumer(), visited))
+            {
+                stack.push(e->consumer_id());
+            }
+        }
+    }
+
+    return dfs_order_vector;
+}
+} // namespace graph
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/graph/backends/BackendRegistry.cpp b/src/graph/backends/BackendRegistry.cpp
index 2803322..dccfefc 100644
--- a/src/graph/backends/BackendRegistry.cpp
+++ b/src/graph/backends/BackendRegistry.cpp
@@ -48,6 +48,14 @@
     return _registered_backends[target].get();
 }
 
+IDeviceBackend &BackendRegistry::get_backend(Target target)
+{
+    IDeviceBackend *backend = find_backend(target);
+    ARM_COMPUTE_ERROR_ON_MSG(!backend, "Requested backend doesn't exist!");
+    ARM_COMPUTE_ERROR_ON_MSG(!backend->is_backend_supported(), "Requested backend isn't supported");
+    return *backend;
+}
+
 bool BackendRegistry::contains(Target target) const
 {
     auto it = _registered_backends.find(target);
diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp
index bf17f80..1dbeae9 100644
--- a/src/graph/backends/CL/CLDeviceBackend.cpp
+++ b/src/graph/backends/CL/CLDeviceBackend.cpp
@@ -62,19 +62,16 @@
 /** Register CL backend */
 static detail::BackendRegistrar<CLDeviceBackend> CLDeviceBackend_registrar(Target::CL);
 
-/** Tuner export file */
-static const std::string tuner_data_filename = "acl_tuner.csv";
-
 CLDeviceBackend::CLDeviceBackend()
-    : _tuner(), _allocator(cl::Context::getDefault())
+    : _context_count(0), _tuner(), _allocator(nullptr), _tuner_file()
 {
 }
 
 CLDeviceBackend::~CLDeviceBackend()
 {
-    if(_tuner.tune_new_kernels() && !_tuner.lws_table().empty())
+    if(_tuner.tune_new_kernels() && !_tuner.lws_table().empty() && !_tuner_file.empty())
     {
-        _tuner.save_to_file(tuner_data_filename);
+        _tuner.save_to_file(_tuner_file);
     }
 }
 
@@ -85,22 +82,40 @@
 
 void CLDeviceBackend::initialize_backend()
 {
-    // Load tuner data if available
-    if(_tuner.lws_table().empty() && file_exists(tuner_data_filename))
-    {
-        _tuner.load_from_file(tuner_data_filename);
-    }
-
     // Setup Scheduler
     CLScheduler::get().default_init(&_tuner);
 
     // Create allocator with new context
-    _allocator = CLBufferAllocator();
+    _allocator = support::cpp14::make_unique<CLBufferAllocator>();
+}
+
+void CLDeviceBackend::release_backend_context(GraphContext &ctx)
+{
+    ARM_COMPUTE_UNUSED(ctx);
+    _context_count--;
+    if(_context_count == 0) // No more context using the backend: free resources
+    {
+        _allocator = nullptr;
+    }
 }
 
 void CLDeviceBackend::setup_backend_context(GraphContext &ctx)
 {
+    // Force backend initialization
+    _context_count++;
+    if(_context_count == 1)
+    {
+        initialize_backend();
+    }
+
     // Setup tuner
+    _tuner_file = ctx.config().tuner_file;
+    // Load tuner data if available
+    if(file_exists(_tuner_file))
+    {
+        _tuner.load_from_file(_tuner_file);
+    }
+
     set_kernel_tuning(ctx.config().use_tuner);
 
     // Setup a management backend
@@ -123,7 +138,7 @@
 
 IAllocator *CLDeviceBackend::backend_allocator()
 {
-    return &_allocator;
+    return _allocator.get();
 }
 
 std::unique_ptr<ITensorHandle> CLDeviceBackend::create_tensor(const Tensor &tensor)
@@ -179,7 +194,7 @@
     auto pool_mgr     = std::make_shared<PoolManager>();
     auto mm           = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
 
-    mm->set_allocator(&_allocator);
+    mm->set_allocator(_allocator.get());
 
     return mm;
 }
diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp
index db8a7a0..bf3dcba 100644
--- a/src/graph/backends/CL/CLFunctionsFactory.cpp
+++ b/src/graph/backends/CL/CLFunctionsFactory.cpp
@@ -25,16 +25,9 @@
 
 #include "arm_compute/core/utils/misc/Cast.h"
 #include "arm_compute/graph/Graph.h"
-#include "arm_compute/graph/GraphContext.h"
-#include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/TypePrinter.h"
-#include "arm_compute/graph/Types.h"
-#include "arm_compute/graph/backends/Utils.h"
-#include "arm_compute/graph/nodes/Nodes.h"
+#include "arm_compute/graph/backends/FunctionHelpers.h"
 #include "arm_compute/runtime/CL/CLFunctions.h"
 
-#include "support/ToolchainSupport.h"
-
 using namespace arm_compute::utils::cast;
 
 namespace arm_compute
@@ -43,526 +36,38 @@
 {
 namespace backends
 {
-namespace
+/** Target specific information structure used to pass information to the layer templates */
+struct CLTargetInfo
 {
-/** Returns backing tensor of a given tensor
- *
- * @param[in] tensor Tensor to extract the backing tensor from
- *
- * @return Backing tensor if present else nullptr
- */
-arm_compute::ICLTensor *get_backing_tensor(arm_compute::graph::Tensor *tensor)
+    using TensorType = arm_compute::ICLTensor;
+    static Target TargetType;
+};
+
+Target CLTargetInfo::TargetType = Target::CL;
+
+/** Collection of CL convolution functions */
+struct CLConvolutionLayerFunctions
 {
-    arm_compute::ICLTensor *backing_tensor = nullptr;
-    if(tensor != nullptr)
-    {
-        ARM_COMPUTE_ERROR_ON(tensor->desc().target != arm_compute::graph::Target::CL);
-        // Get backing tensor handle
-        ITensorHandle *tensor_handle = tensor->handle();
-        // Get backing tensor
-        backing_tensor = (tensor_handle != nullptr) ? polymorphic_cast<ICLTensor *>(&tensor_handle->tensor()) : nullptr;
-    }
+    using GenericConvolutionLayer  = CLConvolutionLayer;
+    using GEMMConvolutionLayer     = CLGEMMConvolutionLayer;
+    using DirectConvolutionLayer   = CLDirectConvolutionLayer;
+    using WinogradConvolutionLayer = CLWinogradConvolutionLayer;
+};
 
-    return backing_tensor;
-}
-
-/** Create a backend activation layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend activation layer function
- */
-std::unique_ptr<IFunction> create_activation_layer(ActivationLayerNode &node)
+/** Collection of CL depthwise convolution functions */
+struct CLDepthwiseConvolutionLayerFunctions
 {
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE(
-        "Creating CL ActivationLayerNode node with ID : " << node.id() << " and Name: " << node.name()
-        << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
+    using GenericDepthwiseConvolutionLayer = CLDepthwiseConvolutionLayer;
+    using DepthwiseConvolutionLayer3x3     = CLDepthwiseConvolutionLayer3x3;
+};
 
-    // Extract IO and info
-    ICLTensor                *input    = get_backing_tensor(node.input(0));
-    ICLTensor                *output   = get_backing_tensor(node.output(0));
-    const ActivationLayerInfo act_info = node.activation_info();
-
-    // Create function
-    auto func = support::cpp14::make_unique<CLActivationLayer>();
-    func->configure(input, output, act_info);
-
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated CLActivationLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Shape: " << input->info()->tensor_shape()
-                               << " Activation function: " << act_info.activation()
-                               << " a: " << act_info.a()
-                               << " b: " << act_info.b()
-                               << " InPlace : " << is_in_place_operation(input, output)
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend batch normalization layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend batch normalization layer function
- */
-std::unique_ptr<IFunction> create_batch_normalization_layer(BatchNormalizationLayerNode &node)
+/** Collection of CL element-wise functions */
+struct CLEltwiseFunctions
 {
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating CL BatchNormalization node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 5);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ICLTensor                *input     = get_backing_tensor(node.input(0));
-    ICLTensor                *mean      = get_backing_tensor(node.input(1));
-    ICLTensor                *var       = get_backing_tensor(node.input(2));
-    ICLTensor                *beta      = get_backing_tensor(node.input(3));
-    ICLTensor                *gamma     = get_backing_tensor(node.input(4));
-    ICLTensor                *output    = get_backing_tensor(node.output(0));
-    const float               epsilon   = node.epsilon();
-    const ActivationLayerInfo fused_act = node.fused_activation();
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<CLBatchNormalizationLayer>();
-    func->configure(input, output, mean, var, beta, gamma, epsilon, fused_act);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated CLBatchNormalizationLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Shape: " << input->info()->tensor_shape()
-                               << " Epsilon: " << epsilon << " "
-                               << (fused_act.enabled() ? to_string(fused_act.activation()) : "")
-                               << " InPlace : " << is_in_place_operation(input, output)
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend convolution layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend convolution layer function
- */
-std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node, GraphContext &ctx)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating CL ConvolutionLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 3);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ICLTensor *input   = get_backing_tensor(node.input(0));
-    ICLTensor *weights = get_backing_tensor(node.input(1));
-    ICLTensor *biases  = get_backing_tensor(node.input(2));
-    ICLTensor *output  = get_backing_tensor(node.output(0));
-
-    if(is_data_type_quantized_asymmetric(input->info()->data_type()))
-    {
-        biases->info()->set_data_type(DataType::S32);
-    }
-
-    const PadStrideInfo     conv_info      = node.convolution_info();
-    const ConvolutionMethod conv_algorithm = node.convolution_method();
-    const bool              fast_math      = node.fast_math_hint() == FastMathHint::ENABLED;
-
-    // Create and configure function (we assume that functions have been validated before creation)
-    std::shared_ptr<IMemoryManager> mm = get_memory_manager(ctx, Target::CL);
-    std::unique_ptr<IFunction>      func;
-    std::string                     func_name;
-
-    if(conv_algorithm == ConvolutionMethod::WINOGRAD)
-    {
-        std::tie(func, func_name) = create_named_memory_managed_function<CLWinogradConvolutionLayer>(
-                                        std::string("CLWinogradConvolutionLayer"), mm, input, weights, biases, output, conv_info, ActivationLayerInfo(), fast_math);
-    }
-    else if(conv_algorithm == ConvolutionMethod::DIRECT)
-    {
-        std::tie(func, func_name) = create_named_function<CLDirectConvolutionLayer>(
-                                        std::string("CLDirectConvolutionLayer"), input, weights, biases, output, conv_info);
-    }
-    else if(conv_algorithm == ConvolutionMethod::GEMM)
-    {
-        std::tie(func, func_name) = create_named_memory_managed_function<CLGEMMConvolutionLayer>(std::string("CLGEMMConvolutionLayer"), mm,
-                                                                                                 input, weights, biases, output, conv_info);
-    }
-    else
-    {
-        std::tie(func, func_name) = create_named_memory_managed_function<CLConvolutionLayer>(std::string("CLConvolutionLayer"), mm,
-                                                                                             input, weights, biases, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math);
-    }
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input QuantInfo: " << input->info()->quantization_info()
-                               << " Weights QuantInfo: " << weights->info()->quantization_info()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Weights shape: " << weights->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << std::endl);
-    return func;
-}
-
-/** Create a backend layer depth concatenate function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend depth concatenate layer function
- */
-std::unique_ptr<arm_compute::IFunction> create_depth_concatenate_layer(DepthConcatenateLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating CL DepthConcatenate node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Return nullptr if depth concatenate is switched off
-    if(!node.is_enabled())
-    {
-        return nullptr;
-    }
-
-    // Extract IO and info
-    std::vector<arm_compute::ICLTensor *> inputs;
-    for(unsigned int i = 0; i < node.num_inputs(); ++i)
-    {
-        inputs.push_back(get_backing_tensor(node.input(i)));
-    }
-    ICLTensor *output = get_backing_tensor(node.output(0));
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<CLDepthConcatenateLayer>();
-    func->configure(inputs, output);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated CLDepthConcatenateLayer"
-                               << " Data Type: " << output->info()->data_type()
-                               << " Shape: " << output->info()->tensor_shape()
-                               << " Num Inputs: " << inputs.size()
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend layer depth-wise convolution function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend depth-wise convolution layer function
- */
-std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvolutionLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE(
-        "Creating CL DepthwiseConvolutionLayer node with ID : " << node.id() << " and Name: " << node.name()
-        << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 3);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ICLTensor *input   = get_backing_tensor(node.input(0));
-    ICLTensor *weights = get_backing_tensor(node.input(1));
-    ICLTensor *biases  = get_backing_tensor(node.input(2));
-    ICLTensor *output  = get_backing_tensor(node.output(0));
-
-    if(is_data_type_quantized_asymmetric(input->info()->data_type()))
-    {
-        biases->info()->set_data_type(DataType::S32);
-    }
-
-    const PadStrideInfo              conv_info     = node.convolution_info();
-    const DepthwiseConvolutionMethod dwc_algorithm = node.depthwise_convolution_method();
-
-    // Create and configure function (we assume that functions have been validated before creation)
-    std::unique_ptr<IFunction> func;
-    std::string                func_name;
-    if(dwc_algorithm == DepthwiseConvolutionMethod::OPTIMIZED_3x3)
-    {
-        std::tie(func, func_name) = create_named_function<CLDepthwiseConvolutionLayer3x3>(
-                                        std::string("CLDepthwiseConvolutionLayer3x3"), input, weights, biases, output, conv_info);
-    }
-    else
-    {
-        std::tie(func, func_name) = create_named_function<CLDepthwiseConvolutionLayer>(
-                                        std::string("CLDepthwiseConvolutionLayer"), input, weights, biases, output, conv_info);
-    }
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input QuantInfo: " << input->info()->quantization_info()
-                               << " Weights QuantInfo: " << weights->info()->quantization_info()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Weights shape: " << weights->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << std::endl);
-    return func;
-}
-
-/** Create a backend element-wise operation layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend element-wise operation layer function
- */
-std::unique_ptr<IFunction> create_eltwise_layer(EltwiseLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE(
-        "Creating CL EltwiseLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 2);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ICLTensor             *input1         = get_backing_tensor(node.input(0));
-    ICLTensor             *input2         = get_backing_tensor(node.input(1));
-    ICLTensor             *output         = get_backing_tensor(node.output(0));
-    const EltwiseOperation eltwise_op     = node.eltwise_operation();
-    const ConvertPolicy    convert_policy = node.convert_policy();
-    ARM_COMPUTE_ERROR_ON(input1 == nullptr);
-    ARM_COMPUTE_ERROR_ON(input2 == nullptr);
-    ARM_COMPUTE_ERROR_ON(output == nullptr);
-
-    std::unique_ptr<IFunction> func = nullptr;
-    std::string                func_name;
-    if(eltwise_op == EltwiseOperation::ADD)
-    {
-        std::tie(func, func_name) = create_named_function<CLArithmeticAddition>(std::string("CLArithmeticAddition"),
-                                                                                input1, input2, output,
-                                                                                convert_policy);
-    }
-    else if(eltwise_op == EltwiseOperation::SUB)
-    {
-        std::tie(func, func_name) = create_named_function<CLArithmeticSubtraction>(
-                                        std::string("CLArithmeticSubtraction"), input1, input2, output, convert_policy);
-    }
-    else if(eltwise_op == EltwiseOperation::MUL)
-    {
-        std::tie(func, func_name) = create_named_function<CLPixelWiseMultiplication>(
-                                        std::string("CLPixelWiseMultiplication"), input1, input2, output, 1.f, convert_policy,
-                                        node.rounding_policy());
-    }
-    else
-    {
-        ARM_COMPUTE_ERROR("Unsupported element-wise operation!");
-    }
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
-                               << " Data Type: " << input1->info()->data_type()
-                               << " Shape : " << input1->info()->tensor_shape()
-                               << std::endl);
-
-    return func;
-}
-
-/** Create a backend flatten layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend flatten layer function
- */
-std::unique_ptr<IFunction> create_flatten_layer(FlattenLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE(
-        "Creating CL FlattenLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ICLTensor *input  = get_backing_tensor(node.input(0));
-    ICLTensor *output = get_backing_tensor(node.output(0));
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<CLFlattenLayer>();
-    func->configure(input, output);
-    ARM_COMPUTE_ERROR_ON(input == nullptr);
-    ARM_COMPUTE_ERROR_ON(output == nullptr);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated CLFlattenLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend fully connected layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend fully connected layer function
- */
-std::unique_ptr<IFunction> create_fully_connected_layer(FullyConnectedLayerNode &node, GraphContext &ctx)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE(
-        "Creating CL FullyConnectedLayer node with ID : " << node.id() << " and Name: " << node.name()
-        << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 3);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ICLTensor *input   = get_backing_tensor(node.input(0));
-    ICLTensor *weights = get_backing_tensor(node.input(1));
-    ICLTensor *biases  = get_backing_tensor(node.input(2));
-    ICLTensor *output  = get_backing_tensor(node.output(0));
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<CLFullyConnectedLayer>(get_memory_manager(ctx, Target::CL));
-    func->configure(input, weights, biases, output);
-    ARM_COMPUTE_ERROR_ON(input == nullptr);
-    ARM_COMPUTE_ERROR_ON(weights == nullptr);
-    ARM_COMPUTE_ERROR_ON(output == nullptr);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated CLFullyConnectedLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Weights shape: " << weights->info()->tensor_shape()
-                               << " Biases Shape: " << biases->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend normalization layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend normalization layer function
- */
-std::unique_ptr<IFunction> create_normalization_layer(NormalizationLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE(
-        "Creating CL NormalizationLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ICLTensor                   *input     = get_backing_tensor(node.input(0));
-    ICLTensor                   *output    = get_backing_tensor(node.output(0));
-    const NormalizationLayerInfo norm_info = node.normalization_info();
-    ARM_COMPUTE_ERROR_ON(input == nullptr);
-    ARM_COMPUTE_ERROR_ON(output == nullptr);
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<CLNormalizationLayer>();
-    func->configure(input, output, norm_info);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated CLNormalizationLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << " Normalization info: " << norm_info.type()
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend pooling layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend pooling layer function
- */
-std::unique_ptr<IFunction> create_pooling_layer(PoolingLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE(
-        "Creating CL PoolingLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ICLTensor             *input     = get_backing_tensor(node.input(0));
-    ICLTensor             *output    = get_backing_tensor(node.output(0));
-    const PoolingLayerInfo pool_info = node.pooling_info();
-    ARM_COMPUTE_ERROR_ON(input == nullptr);
-    ARM_COMPUTE_ERROR_ON(output == nullptr);
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<CLPoolingLayer>();
-    func->configure(input, output, pool_info);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated CLPoolingLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << " Pooling info: " << pool_info.pool_type()
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend reshape layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend reshape layer function
- */
-std::unique_ptr<IFunction> create_reshape_layer(ReshapeLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE(
-        "Creating CL ReshapeLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ICLTensor *input  = get_backing_tensor(node.input(0));
-    ICLTensor *output = get_backing_tensor(node.output(0));
-    ARM_COMPUTE_ERROR_ON(input == nullptr);
-    ARM_COMPUTE_ERROR_ON(output == nullptr);
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<CLReshapeLayer>();
-    func->configure(input, output);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated CLReshapeLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend softmax layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend softmax layer function
- */
-std::unique_ptr<IFunction> create_softmax_layer(SoftmaxLayerNode &node, GraphContext &ctx)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE(
-        "Creating CL SoftmaxLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ICLTensor *input  = get_backing_tensor(node.input(0));
-    ICLTensor *output = get_backing_tensor(node.output(0));
-    const float beta   = node.beta();
-    ARM_COMPUTE_ERROR_ON(input == nullptr);
-    ARM_COMPUTE_ERROR_ON(output == nullptr);
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<CLSoftmaxLayer>(get_memory_manager(ctx, Target::CL));
-    func->configure(input, output, beta);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated CLSoftmaxLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << std::endl);
-
-    return std::move(func);
-}
-} // namespace
+    using Addition       = CLArithmeticAddition;
+    using Subtraction    = CLArithmeticSubtraction;
+    using Multiplication = CLPixelWiseMultiplication;
+};
 
 std::unique_ptr<IFunction> CLFunctionFactory::create(INode *node, GraphContext &ctx)
 {
@@ -575,33 +80,41 @@
     switch(type)
     {
         case NodeType::ActivationLayer:
-            return create_activation_layer(*polymorphic_downcast<ActivationLayerNode *>(node));
+            return detail::create_activation_layer<CLActivationLayer, CLTargetInfo>(*polymorphic_downcast<ActivationLayerNode *>(node));
         case NodeType::BatchNormalizationLayer:
-            return create_batch_normalization_layer(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
+            return detail::create_batch_normalization_layer<CLBatchNormalizationLayer, CLTargetInfo>(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
+        case NodeType::ChannelShuffleLayer:
+            return detail::create_channel_shuffle_layer<CLChannelShuffleLayer, CLTargetInfo>(*polymorphic_downcast<ChannelShuffleLayerNode *>(node));
         case NodeType::ConvolutionLayer:
-            return create_convolution_layer(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
-        case NodeType::DepthConcatenateLayer:
-            return create_depth_concatenate_layer(*polymorphic_downcast<DepthConcatenateLayerNode *>(node));
+            return detail::create_convolution_layer<CLConvolutionLayerFunctions, CLTargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
+        case NodeType::DeconvolutionLayer:
+            return detail::create_deconvolution_layer<CLDeconvolutionLayer, CLTargetInfo>(*polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
+        case NodeType::ConcatenateLayer:
+            return detail::create_concatenate_layer<CLConcatenateLayer, CLTargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
         case NodeType::DepthwiseConvolutionLayer:
-            return create_depthwise_convolution_layer(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+            return detail::create_depthwise_convolution_layer<CLDepthwiseConvolutionLayerFunctions, CLTargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
         case NodeType::EltwiseLayer:
-            return create_eltwise_layer(*polymorphic_downcast<EltwiseLayerNode *>(node));
+            return detail::create_eltwise_layer<CLEltwiseFunctions, CLTargetInfo>(*polymorphic_downcast<EltwiseLayerNode *>(node));
         case NodeType::FlattenLayer:
-            return create_flatten_layer(*polymorphic_downcast<FlattenLayerNode *>(node));
+            return detail::create_flatten_layer<CLFlattenLayer, CLTargetInfo>(*polymorphic_downcast<FlattenLayerNode *>(node));
         case NodeType::FullyConnectedLayer:
-            return create_fully_connected_layer(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
+            return detail::create_fully_connected_layer<CLFullyConnectedLayer, CLTargetInfo>(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
         case NodeType::NormalizationLayer:
-            return create_normalization_layer(*polymorphic_downcast<NormalizationLayerNode *>(node));
+            return detail::create_normalization_layer<CLNormalizationLayer, CLTargetInfo>(*polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
+        case NodeType::PermuteLayer:
+            return detail::create_permute_layer<CLPermute, CLTargetInfo>(*polymorphic_downcast<PermuteLayerNode *>(node));
         case NodeType::PoolingLayer:
-            return create_pooling_layer(*polymorphic_downcast<PoolingLayerNode *>(node));
+            return detail::create_pooling_layer<CLPoolingLayer, CLTargetInfo>(*polymorphic_downcast<PoolingLayerNode *>(node));
         case NodeType::ReshapeLayer:
-            return create_reshape_layer(*polymorphic_downcast<ReshapeLayerNode *>(node));
+            return detail::create_reshape_layer<CLReshapeLayer, CLTargetInfo>(*polymorphic_downcast<ReshapeLayerNode *>(node));
+        case NodeType::ResizeLayer:
+            return detail::create_resize_layer<CLScale, CLTargetInfo>(*polymorphic_downcast<ResizeLayerNode *>(node));
         case NodeType::SoftmaxLayer:
-            return create_softmax_layer(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
+            return detail::create_softmax_layer<CLSoftmaxLayer, CLTargetInfo>(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
         default:
             return nullptr;
     }
 }
 } // namespace backends
 } // namespace graph
-} // namespace arm_compute
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/graph/backends/CL/CLNodeValidator.cpp b/src/graph/backends/CL/CLNodeValidator.cpp
index c16b2e6..ba5b59d 100644
--- a/src/graph/backends/CL/CLNodeValidator.cpp
+++ b/src/graph/backends/CL/CLNodeValidator.cpp
@@ -47,6 +47,8 @@
     NodeType type = node->type();
     switch(type)
     {
+        case NodeType::ChannelShuffleLayer:
+            return detail::validate_channel_shuffle_layer<CLChannelShuffleLayer>(*polymorphic_downcast<ChannelShuffleLayerNode *>(node));
         case NodeType::ConvolutionLayer:
             return detail::validate_convolution_layer<CLConvolutionLayer,
                    CLDirectConvolutionLayer,
@@ -55,6 +57,8 @@
         case NodeType::DepthwiseConvolutionLayer:
             return detail::validate_depthwise_convolution_layer<CLDepthwiseConvolutionLayer,
                    CLDepthwiseConvolutionLayer3x3>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+        case NodeType::PermuteLayer:
+            return detail::validate_permute_layer<CLPermute>(*polymorphic_downcast<PermuteLayerNode *>(node));
         default:
             return Status{};
     }
diff --git a/src/graph/backends/GLES/GCDeviceBackend.cpp b/src/graph/backends/GLES/GCDeviceBackend.cpp
index 770cca5..ec3cf4f 100644
--- a/src/graph/backends/GLES/GCDeviceBackend.cpp
+++ b/src/graph/backends/GLES/GCDeviceBackend.cpp
@@ -53,7 +53,7 @@
 static detail::BackendRegistrar<GCDeviceBackend> GCDeviceBackend_registrar(Target::GC);
 
 GCDeviceBackend::GCDeviceBackend()
-    : _allocator()
+    : _initialized(false), _allocator()
 {
 }
 
@@ -63,8 +63,21 @@
     GCScheduler::get().default_init();
 }
 
+void GCDeviceBackend::release_backend_context(GraphContext &ctx)
+{
+    //Nothing to do
+    ARM_COMPUTE_UNUSED(ctx);
+}
+
 void GCDeviceBackend::setup_backend_context(GraphContext &ctx)
 {
+    // Force backend initialization
+    if(!_initialized)
+    {
+        initialize_backend();
+        _initialized = true;
+    }
+
     // Setup a management backend
     if(ctx.memory_management_ctx(Target::GC) == nullptr)
     {
@@ -144,4 +157,4 @@
 }
 } // namespace backends
 } // namespace graph
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp
index e61e840..f72513c 100644
--- a/src/graph/backends/GLES/GCFunctionsFactory.cpp
+++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp
@@ -25,16 +25,9 @@
 
 #include "arm_compute/core/utils/misc/Cast.h"
 #include "arm_compute/graph/Graph.h"
-#include "arm_compute/graph/GraphContext.h"
-#include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/TypePrinter.h"
-#include "arm_compute/graph/Types.h"
-#include "arm_compute/graph/backends/Utils.h"
-#include "arm_compute/graph/nodes/Nodes.h"
+#include "arm_compute/graph/backends/FunctionHelpers.h"
 #include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h"
 
-#include "support/ToolchainSupport.h"
-
 using namespace arm_compute::utils::cast;
 
 namespace arm_compute
@@ -43,120 +36,84 @@
 {
 namespace backends
 {
-namespace
+/** Target specific information structure used to pass information to the layer templates */
+struct GCTargetInfo
 {
-/** Returns backing tensor of a given tensor
- *
- * @param[in] tensor Tensor to extract the backing tensor from
- *
- * @return Backing tensor if present else nullptr
- */
-arm_compute::IGCTensor *get_backing_tensor(arm_compute::graph::Tensor *tensor)
+    using TensorType = arm_compute::IGCTensor;
+    static Target TargetType;
+};
+
+Target GCTargetInfo::TargetType = Target::GC;
+
+/** Collection of GC convolution functions */
+struct GCConvolutionLayerFunctions
 {
-    arm_compute::IGCTensor *backing_tensor = nullptr;
-    if(tensor != nullptr)
+    using GenericConvolutionLayer = GCConvolutionLayer;
+    using GEMMConvolutionLayer    = GCConvolutionLayer;
+    using DirectConvolutionLayer  = GCDirectConvolutionLayer;
+};
+
+/** Collection of GC depthwise convolution functions */
+struct GCDepthwiseConvolutionLayerFunctions
+{
+    using DepthwiseConvolutionLayer3x3 = GCDepthwiseConvolutionLayer3x3;
+};
+
+/** Collection of GC element-wise functions */
+struct GCEltwiseFunctions
+{
+    using Addition       = GCArithmeticAddition;
+    using Multiplication = GCPixelWiseMultiplication;
+};
+
+namespace detail
+{
+// Specialize functions
+template <>
+std::unique_ptr<IFunction> create_concatenate_layer<GCDepthConcatenateLayer, GCTargetInfo>(ConcatenateLayerNode &node)
+{
+    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating Concatenate node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
+    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
+
+    // Return nullptr if depth concatenate is switched off
+    if(!node.is_enabled())
     {
-        ARM_COMPUTE_ERROR_ON(tensor->desc().target != arm_compute::graph::Target::GC);
-        // Get backing tensor handle
-        ITensorHandle *tensor_handle = tensor->handle();
-        // Get backing tensor
-        backing_tensor = (tensor_handle != nullptr) ? polymorphic_cast<IGCTensor *>(&tensor_handle->tensor()) : nullptr;
+        return nullptr;
     }
 
-    return backing_tensor;
-}
-
-/** Create a backend activation layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend activation layer function
- */
-std::unique_ptr<IFunction> create_activation_layer(ActivationLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE(
-        "Creating GC ActivationLayerNode node with ID : " << node.id() << " and Name: " << node.name()
-        << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
     // Extract IO and info
-    IGCTensor                *input    = get_backing_tensor(node.input(0));
-    IGCTensor                *output   = get_backing_tensor(node.output(0));
-    const ActivationLayerInfo act_info = node.activation_info();
-
-    // Create function
-    auto func = support::cpp14::make_unique<GCActivationLayer>();
-    func->configure(input, output, act_info);
-
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated GCActivationLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Shape: " << input->info()->tensor_shape()
-                               << " Activation function: " << act_info.activation()
-                               << " a: " << act_info.a()
-                               << " b: " << act_info.b()
-                               << " InPlace : " << is_in_place_operation(input, output)
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend batch normalization layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend batch normalization layer function
- */
-std::unique_ptr<IFunction> create_batch_normalization_layer(BatchNormalizationLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating GC BatchNormalization node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 5);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    IGCTensor                *input     = get_backing_tensor(node.input(0));
-    IGCTensor                *mean      = get_backing_tensor(node.input(1));
-    IGCTensor                *var       = get_backing_tensor(node.input(2));
-    IGCTensor                *beta      = get_backing_tensor(node.input(3));
-    IGCTensor                *gamma     = get_backing_tensor(node.input(4));
-    IGCTensor                *output    = get_backing_tensor(node.output(0));
-    const float               epsilon   = node.epsilon();
-    const ActivationLayerInfo fused_act = node.fused_activation();
+    std::vector<GCTargetInfo::TensorType *> inputs;
+    for(unsigned int i = 0; i < node.num_inputs(); ++i)
+    {
+        inputs.push_back(get_backing_tensor<GCTargetInfo>(node.input(i)));
+    }
+    typename GCTargetInfo::TensorType *output = get_backing_tensor<GCTargetInfo>(node.output(0));
 
     // Create and configure function
-    auto func = support::cpp14::make_unique<GCBatchNormalizationLayer>();
-    func->configure(input, output, mean, var, beta, gamma, epsilon, fused_act);
+    auto func = support::cpp14::make_unique<GCDepthConcatenateLayer>();
+    func->configure(inputs, output);
 
     // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated GCBatchNormalizationLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Shape: " << input->info()->tensor_shape()
-                               << " Epsilon: " << epsilon << " "
-                               << (fused_act.enabled() ? to_string(fused_act.activation()) : "")
-                               << " InPlace : " << is_in_place_operation(input, output)
+    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << node.type()
+                               << " Target " << GCTargetInfo::TargetType
+                               << " Data Type: " << output->info()->data_type()
+                               << " Shape: " << output->info()->tensor_shape()
+                               << " Num Inputs: " << inputs.size()
                                << std::endl);
 
     return std::move(func);
 }
 
-/** Create a backend convolution layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend convolution layer function
- */
-std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node, GraphContext &ctx)
+template <>
+std::unique_ptr<IFunction> create_convolution_layer<GCConvolutionLayerFunctions, GCTargetInfo>(ConvolutionLayerNode &node, GraphContext &ctx)
 {
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating GC ConvolutionLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 3);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
+    validate_node<GCTargetInfo>(node, 3 /* expected inputs */, 1 /* expected outputs */);
 
     // Extract IO and info
-    IGCTensor *input   = get_backing_tensor(node.input(0));
-    IGCTensor *weights = get_backing_tensor(node.input(1));
-    IGCTensor *biases  = get_backing_tensor(node.input(2));
-    IGCTensor *output  = get_backing_tensor(node.output(0));
+    GCTargetInfo::TensorType *input   = get_backing_tensor<GCTargetInfo>(node.input(0));
+    GCTargetInfo::TensorType *weights = get_backing_tensor<GCTargetInfo>(node.input(1));
+    GCTargetInfo::TensorType *biases  = get_backing_tensor<GCTargetInfo>(node.input(2));
+    GCTargetInfo::TensorType *output  = get_backing_tensor<GCTargetInfo>(node.output(0));
 
     if(is_data_type_quantized_asymmetric(input->info()->data_type()))
     {
@@ -167,19 +124,21 @@
     const ConvolutionMethod conv_algorithm = node.convolution_method();
 
     // Create and configure function (we assume that functions have been validated before creation)
-    std::shared_ptr<IMemoryManager> mm = get_memory_manager(ctx, Target::GC);
+    std::shared_ptr<IMemoryManager> mm = get_memory_manager(ctx, GCTargetInfo::TargetType);
     std::unique_ptr<IFunction>      func;
     std::string                     func_name;
 
-    if(conv_algorithm == ConvolutionMethod::DIRECT)
+    if(conv_algorithm == ConvolutionMethod::Direct)
     {
-        std::tie(func, func_name) = create_named_function<GCDirectConvolutionLayer>(
-                                        std::string("GCDirectConvolutionLayer"), input, weights, biases, output, conv_info);
+        std::tie(func, func_name) = create_named_function<GCConvolutionLayerFunctions::DirectConvolutionLayer>(
+                                        std::string("DirectConvolutionLayer"),
+                                        input, weights, biases, output, conv_info);
     }
     else
     {
-        std::tie(func, func_name) = create_named_memory_managed_function<GCConvolutionLayer>(std::string("GCConvolutionLayer"), mm,
-                                                                                             input, weights, biases, output, conv_info);
+        std::tie(func, func_name) = create_named_memory_managed_function<GCConvolutionLayerFunctions::GenericConvolutionLayer>(
+                                        std::string("ConvolutionLayer"), mm,
+                                        input, weights, biases, output, conv_info);
     }
 
     // Log info
@@ -194,64 +153,16 @@
     return func;
 }
 
-/** Create a backend layer depth concatenate function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend depth concatenate layer function
- */
-std::unique_ptr<arm_compute::IFunction> create_depth_concatenate_layer(DepthConcatenateLayerNode &node)
+template <>
+std::unique_ptr<IFunction> create_depthwise_convolution_layer<GCDepthwiseConvolutionLayerFunctions, GCTargetInfo>(DepthwiseConvolutionLayerNode &node)
 {
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating GC DepthConcatenate node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Return nullptr if depth concatenate is switched off
-    if(!node.is_enabled())
-    {
-        return nullptr;
-    }
+    validate_node<GCTargetInfo>(node, 3 /* expected inputs */, 1 /* expected outputs */);
 
     // Extract IO and info
-    std::vector<arm_compute::IGCTensor *> inputs;
-    for(unsigned int i = 0; i < node.num_inputs(); ++i)
-    {
-        inputs.push_back(get_backing_tensor(node.input(i)));
-    }
-    IGCTensor *output = get_backing_tensor(node.output(0));
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<GCDepthConcatenateLayer>();
-    func->configure(inputs, output);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated GCDepthConcatenateLayer"
-                               << " Data Type: " << output->info()->data_type()
-                               << " Shape: " << output->info()->tensor_shape()
-                               << " Num Inputs: " << inputs.size()
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend layer depth-wise convolution function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend depth-wise convolution layer function
- */
-std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvolutionLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE(
-        "Creating GC DepthwiseConvolutionLayer node with ID : " << node.id() << " and Name: " << node.name()
-        << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 3);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    IGCTensor *input   = get_backing_tensor(node.input(0));
-    IGCTensor *weights = get_backing_tensor(node.input(1));
-    IGCTensor *biases  = get_backing_tensor(node.input(2));
-    IGCTensor *output  = get_backing_tensor(node.output(0));
+    GCTargetInfo::TensorType *input   = get_backing_tensor<GCTargetInfo>(node.input(0));
+    GCTargetInfo::TensorType *weights = get_backing_tensor<GCTargetInfo>(node.input(1));
+    GCTargetInfo::TensorType *biases  = get_backing_tensor<GCTargetInfo>(node.input(2));
+    GCTargetInfo::TensorType *output  = get_backing_tensor<GCTargetInfo>(node.output(0));
 
     if(is_data_type_quantized_asymmetric(input->info()->data_type()))
     {
@@ -264,10 +175,11 @@
     // Create and configure function (we assume that functions have been validated before creation)
     std::unique_ptr<IFunction> func;
     std::string                func_name;
-    if(dwc_algorithm == DepthwiseConvolutionMethod::OPTIMIZED_3x3)
+    if(dwc_algorithm == DepthwiseConvolutionMethod::Optimized3x3)
     {
-        std::tie(func, func_name) = create_named_function<GCDepthwiseConvolutionLayer3x3>(
-                                        std::string("GCDepthwiseConvolutionLayer3x3"), input, weights, biases, output, conv_info);
+        std::tie(func, func_name) = create_named_function<GCDepthwiseConvolutionLayerFunctions::DepthwiseConvolutionLayer3x3>(
+                                        std::string("DepthwiseConvolutionLayer3x3"),
+                                        input, weights, biases, output, conv_info);
     }
     else
     {
@@ -276,6 +188,7 @@
 
     // Log info
     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
+                               << " Target " << GCTargetInfo::TargetType
                                << " Data Type: " << input->info()->data_type()
                                << " Input QuantInfo: " << input->info()->quantization_info()
                                << " Weights QuantInfo: " << weights->info()->quantization_info()
@@ -286,13 +199,8 @@
     return func;
 }
 
-/** Create a backend element-wise operation layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend element-wise operation layer function
- */
-std::unique_ptr<IFunction> create_eltwise_layer(EltwiseLayerNode &node)
+template <>
+std::unique_ptr<IFunction> create_eltwise_layer<GCEltwiseFunctions, GCTargetInfo>(EltwiseLayerNode &node)
 {
     ARM_COMPUTE_LOG_GRAPH_VERBOSE(
         "Creating GC EltwiseLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
@@ -300,31 +208,32 @@
     ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
 
     // Extract IO and info
-    IGCTensor             *input1         = get_backing_tensor(node.input(0));
-    IGCTensor             *input2         = get_backing_tensor(node.input(1));
-    IGCTensor             *output         = get_backing_tensor(node.output(0));
-    const EltwiseOperation eltwise_op     = node.eltwise_operation();
-    const ConvertPolicy    convert_policy = node.convert_policy();
+    GCTargetInfo::TensorType *input1         = get_backing_tensor<GCTargetInfo>(node.input(0));
+    GCTargetInfo::TensorType *input2         = get_backing_tensor<GCTargetInfo>(node.input(1));
+    GCTargetInfo::TensorType *output         = get_backing_tensor<GCTargetInfo>(node.output(0));
+    const EltwiseOperation    eltwise_op     = node.eltwise_operation();
+    const ConvertPolicy       convert_policy = node.convert_policy();
     ARM_COMPUTE_ERROR_ON(input1 == nullptr);
     ARM_COMPUTE_ERROR_ON(input2 == nullptr);
     ARM_COMPUTE_ERROR_ON(output == nullptr);
 
     std::unique_ptr<IFunction> func = nullptr;
     std::string                func_name;
-    if(eltwise_op == EltwiseOperation::ADD)
+    if(eltwise_op == EltwiseOperation::Add)
     {
-        std::tie(func, func_name) = create_named_function<GCArithmeticAddition>(std::string("GCArithmeticAddition"),
-                                                                                input1, input2, output,
-                                                                                convert_policy);
+        std::tie(func, func_name) = create_named_function<GCEltwiseFunctions::Addition>(
+                                        std::string("GCArithmeticAddition"),
+                                        input1, input2, output, convert_policy);
     }
-    else if(eltwise_op == EltwiseOperation::SUB)
+    else if(eltwise_op == EltwiseOperation::Sub)
     {
         ARM_COMPUTE_ERROR("Arithmetic subtraction is not supported in GLES backend");
     }
-    else if(eltwise_op == EltwiseOperation::MUL)
+    else if(eltwise_op == EltwiseOperation::Mul)
     {
-        std::tie(func, func_name) = create_named_function<GCPixelWiseMultiplication>(
-                                        std::string("GCPixelWiseMultiplication"), input1, input2, output, 1.f);
+        std::tie(func, func_name) = create_named_function<GCEltwiseFunctions::Multiplication>(
+                                        std::string("PixelWiseMultiplication"),
+                                        input1, input2, output, 1.f);
     }
     else
     {
@@ -332,157 +241,16 @@
     }
 
     // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
+    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << node.type()
+                               << " Target " << GCTargetInfo::TargetType
+                               << " Operation " << func_name
                                << " Data Type: " << input1->info()->data_type()
                                << " Shape : " << input1->info()->tensor_shape()
                                << std::endl);
 
     return func;
 }
-
-/** Create a backend fully connected layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend fully connected layer function
- */
-std::unique_ptr<IFunction> create_fully_connected_layer(FullyConnectedLayerNode &node, GraphContext &ctx)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE(
-        "Creating GC FullyConnectedLayer node with ID : " << node.id() << " and Name: " << node.name()
-        << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 3);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    IGCTensor *input   = get_backing_tensor(node.input(0));
-    IGCTensor *weights = get_backing_tensor(node.input(1));
-    IGCTensor *biases  = get_backing_tensor(node.input(2));
-    IGCTensor *output  = get_backing_tensor(node.output(0));
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<GCFullyConnectedLayer>(get_memory_manager(ctx, Target::GC));
-    func->configure(input, weights, biases, output);
-    ARM_COMPUTE_ERROR_ON(input == nullptr);
-    ARM_COMPUTE_ERROR_ON(weights == nullptr);
-    ARM_COMPUTE_ERROR_ON(output == nullptr);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated GCFullyConnectedLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Weights shape: " << weights->info()->tensor_shape()
-                               << " Biases Shape: " << biases->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend normalization layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend normalization layer function
- */
-std::unique_ptr<IFunction> create_normalization_layer(NormalizationLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE(
-        "Creating GC NormalizationLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    IGCTensor                   *input     = get_backing_tensor(node.input(0));
-    IGCTensor                   *output    = get_backing_tensor(node.output(0));
-    const NormalizationLayerInfo norm_info = node.normalization_info();
-    ARM_COMPUTE_ERROR_ON(input == nullptr);
-    ARM_COMPUTE_ERROR_ON(output == nullptr);
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<GCNormalizationLayer>();
-    func->configure(input, output, norm_info);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated GCNormalizationLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << " Normalization info: " << norm_info.type()
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend pooling layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend pooling layer function
- */
-std::unique_ptr<IFunction> create_pooling_layer(PoolingLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE(
-        "Creating GC PoolingLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    IGCTensor             *input     = get_backing_tensor(node.input(0));
-    IGCTensor             *output    = get_backing_tensor(node.output(0));
-    const PoolingLayerInfo pool_info = node.pooling_info();
-    ARM_COMPUTE_ERROR_ON(input == nullptr);
-    ARM_COMPUTE_ERROR_ON(output == nullptr);
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<GCPoolingLayer>();
-    func->configure(input, output, pool_info);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated GCPoolingLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << " Pooling info: " << pool_info.pool_type()
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend softmax layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend softmax layer function
- */
-std::unique_ptr<IFunction> create_softmax_layer(SoftmaxLayerNode &node, GraphContext &ctx)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE(
-        "Creating GC SoftmaxLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    IGCTensor *input  = get_backing_tensor(node.input(0));
-    IGCTensor *output = get_backing_tensor(node.output(0));
-    const float beta   = node.beta();
-    ARM_COMPUTE_ERROR_ON(input == nullptr);
-    ARM_COMPUTE_ERROR_ON(output == nullptr);
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<GCSoftmaxLayer>(get_memory_manager(ctx, Target::CL));
-    func->configure(input, output, beta);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated GCSoftmaxLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << std::endl);
-
-    return std::move(func);
-}
-} // namespace
+} //namespace detail
 
 std::unique_ptr<IFunction> GCFunctionFactory::create(INode *node, GraphContext &ctx)
 {
@@ -495,29 +263,31 @@
     switch(type)
     {
         case NodeType::ActivationLayer:
-            return create_activation_layer(*polymorphic_downcast<ActivationLayerNode *>(node));
+            return detail::create_activation_layer<GCActivationLayer, GCTargetInfo>(*polymorphic_downcast<ActivationLayerNode *>(node));
         case NodeType::BatchNormalizationLayer:
-            return create_batch_normalization_layer(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
+            return detail::create_batch_normalization_layer<GCBatchNormalizationLayer, GCTargetInfo>(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
         case NodeType::ConvolutionLayer:
-            return create_convolution_layer(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
-        case NodeType::DepthConcatenateLayer:
-            return create_depth_concatenate_layer(*polymorphic_downcast<DepthConcatenateLayerNode *>(node));
+            return detail::create_convolution_layer<GCConvolutionLayerFunctions, GCTargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
+        case NodeType::ConcatenateLayer:
+            return detail::create_concatenate_layer<GCDepthConcatenateLayer, GCTargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
         case NodeType::DepthwiseConvolutionLayer:
-            return create_depthwise_convolution_layer(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+            return detail::create_depthwise_convolution_layer<GCDepthwiseConvolutionLayerFunctions, GCTargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
         case NodeType::EltwiseLayer:
-            return create_eltwise_layer(*polymorphic_downcast<EltwiseLayerNode *>(node));
+            return detail::create_eltwise_layer<GCEltwiseFunctions, GCTargetInfo>(*polymorphic_downcast<EltwiseLayerNode *>(node));
         case NodeType::FullyConnectedLayer:
-            return create_fully_connected_layer(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
+            return detail::create_fully_connected_layer<GCFullyConnectedLayer, GCTargetInfo>(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
         case NodeType::NormalizationLayer:
-            return create_normalization_layer(*polymorphic_downcast<NormalizationLayerNode *>(node));
+            return detail::create_normalization_layer<GCNormalizationLayer, GCTargetInfo>(*polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
         case NodeType::PoolingLayer:
-            return create_pooling_layer(*polymorphic_downcast<PoolingLayerNode *>(node));
+            return detail::create_pooling_layer<GCPoolingLayer, GCTargetInfo>(*polymorphic_downcast<PoolingLayerNode *>(node));
+        case NodeType::ResizeLayer:
+            return detail::create_resize_layer<GCScale, GCTargetInfo>(*polymorphic_downcast<ResizeLayerNode *>(node));
         case NodeType::SoftmaxLayer:
-            return create_softmax_layer(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
+            return detail::create_softmax_layer<GCSoftmaxLayer, GCTargetInfo>(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
         default:
             return nullptr;
     }
 }
 } // namespace backends
 } // namespace graph
-} // namespace arm_compute
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/graph/backends/GLES/GCNodeValidator.cpp b/src/graph/backends/GLES/GCNodeValidator.cpp
index c7f7d81..53049c7 100644
--- a/src/graph/backends/GLES/GCNodeValidator.cpp
+++ b/src/graph/backends/GLES/GCNodeValidator.cpp
@@ -57,7 +57,7 @@
 
     // Validate function
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->tensor_shape().x() != 3 && weights->tensor_shape().y() != 3, "Unsupported depthwise convolution");
-    node.set_depthwise_convolution_method(DepthwiseConvolutionMethod::OPTIMIZED_3x3);
+    node.set_depthwise_convolution_method(DepthwiseConvolutionMethod::Optimized3x3);
 
     return Status{};
 }
@@ -79,15 +79,13 @@
     const ConvolutionMethod   conv_algorithm = node.convolution_method();
 
     // Validate function
-    if(conv_algorithm == ConvolutionMethod::DIRECT)
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(node.num_groups() != 1, "Grouping is not supported by ConvolutionLayer!");
+    if(conv_algorithm == ConvolutionMethod::Direct)
     {
         bool is_square         = weights->tensor_shape().x() == weights->tensor_shape().y();
         bool is_direct         = (weights->tensor_shape().x() == 1) || (weights->tensor_shape().x() == 3) || (weights->tensor_shape().x() == 5);
         bool is_correct_stride = (conv_info.stride().first) <= 2 && (conv_info.stride().second <= 2);
-        if(!(is_square && is_direct && is_correct_stride))
-        {
-            node.set_convolution_method(ConvolutionMethod::DEFAULT);
-        }
+        ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(is_square && is_direct && is_correct_stride), "Direct convolution is not supported for given configuration");
     }
 
     return Status{};
@@ -104,14 +102,18 @@
     NodeType type = node->type();
     switch(type)
     {
+        case NodeType::ChannelShuffleLayer:
+            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : ChannelShuffleLayer");
         case NodeType::ConvolutionLayer:
             return validate_convolution_layer(*polymorphic_downcast<ConvolutionLayerNode *>(node));
         case NodeType::DepthwiseConvolutionLayer:
             return validate_depthwise_convolution_layer(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
         case NodeType::FlattenLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation");
+            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : FlattenLayer");
+        case NodeType::PermuteLayer:
+            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : PermuteLayer");
         case NodeType::ReshapeLayer:
-            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation");
+            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : ReshapeLayer");
         default:
             return Status{};
     }
diff --git a/src/graph/backends/NEON/NEDeviceBackend.cpp b/src/graph/backends/NEON/NEDeviceBackend.cpp
index 7c2db40..5fc44d0 100644
--- a/src/graph/backends/NEON/NEDeviceBackend.cpp
+++ b/src/graph/backends/NEON/NEDeviceBackend.cpp
@@ -61,6 +61,13 @@
 
 void NEDeviceBackend::initialize_backend()
 {
+    //Nothing to do
+}
+
+void NEDeviceBackend::release_backend_context(GraphContext &ctx)
+{
+    //Nothing to do
+    ARM_COMPUTE_UNUSED(ctx);
 }
 
 void NEDeviceBackend::setup_backend_context(GraphContext &ctx)
@@ -155,4 +162,4 @@
 }
 } // namespace backends
 } // namespace graph
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp
index 7b1c50f..36a25ad 100644
--- a/src/graph/backends/NEON/NEFunctionFactory.cpp
+++ b/src/graph/backends/NEON/NEFunctionFactory.cpp
@@ -28,6 +28,7 @@
 #include "arm_compute/graph/GraphContext.h"
 #include "arm_compute/graph/Logger.h"
 #include "arm_compute/graph/TypePrinter.h"
+#include "arm_compute/graph/backends/FunctionHelpers.h"
 #include "arm_compute/graph/backends/Utils.h"
 #include "arm_compute/graph/nodes/Nodes.h"
 #include "arm_compute/runtime/NEON/NEFunctions.h"
@@ -41,108 +42,53 @@
 {
 namespace backends
 {
-namespace
+/** Target specific information structure used to pass information to the layer templates */
+struct NETargetInfo
 {
-/** Returns backing tensor of a given tensor
- *
- * @param[in] tensor Tensor to extract the backing tensor from
- *
- * @return Backing tensor if present else nullptr
- */
-arm_compute::ITensor *get_backing_tensor(arm_compute::graph::Tensor *tensor)
-{
-    return ((tensor == nullptr) || (tensor->handle() == nullptr)) ? nullptr : &tensor->handle()->tensor();
-}
+    using TensorType = arm_compute::ITensor;
+    static Target TargetType;
+};
 
-/** Create a backend activation layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend activation layer function
- */
-std::unique_ptr<IFunction> create_activation_layer(ActivationLayerNode &node)
+Target NETargetInfo::TargetType = Target::NEON;
+
+/** Collection of CL convolution functions */
+struct NEConvolutionLayerFunctions
 {
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON ActivationLayerNode node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
+    using GenericConvolutionLayer  = NEConvolutionLayer;
+    using GEMMConvolutionLayer     = NEGEMMConvolutionLayer;
+    using DirectConvolutionLayer   = NEDirectConvolutionLayer;
+    using WinogradConvolutionLayer = NEWinogradConvolutionLayer;
+};
+
+/** Collection of CL depthwise convolution functions */
+struct NEDepthwiseConvolutionLayerFunctions
+{
+    using GenericDepthwiseConvolutionLayer = NEDepthwiseConvolutionLayer;
+    using DepthwiseConvolutionLayer3x3     = NEDepthwiseConvolutionLayer3x3;
+};
+
+/** Collection of CL element-wise functions */
+struct NEEltwiseFunctions
+{
+    using Addition       = NEArithmeticAddition;
+    using Subtraction    = NEArithmeticSubtraction;
+    using Multiplication = NEPixelWiseMultiplication;
+};
+
+namespace detail
+{
+// Specialize functions
+template <>
+std::unique_ptr<IFunction> create_convolution_layer<NEConvolutionLayerFunctions, NETargetInfo>(ConvolutionLayerNode &node,
+                                                                                               GraphContext &ctx)
+{
+    validate_node<NETargetInfo>(node, 3 /* expected inputs */, 1 /* expected outputs */);
 
     // Extract IO and info
-    ITensor                  *input    = get_backing_tensor(node.input(0));
-    ITensor                  *output   = get_backing_tensor(node.output(0));
-    const ActivationLayerInfo act_info = node.activation_info();
-
-    // Create function
-    auto func = support::cpp14::make_unique<NEActivationLayer>();
-    func->configure(input, output, act_info);
-
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated NEActivationLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Shape: " << input->info()->tensor_shape()
-                               << " Activation function: " << act_info.activation()
-                               << " a: " << act_info.a()
-                               << " b: " << act_info.b()
-                               << " InPlace : " << is_in_place_operation(input, output)
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend batch normalization layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend batch normalization layer function
- */
-std::unique_ptr<IFunction> create_batch_normalization_layer(BatchNormalizationLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON BatchNormalization node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 5);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ITensor                  *input     = get_backing_tensor(node.input(0));
-    ITensor                  *mean      = get_backing_tensor(node.input(1));
-    ITensor                  *var       = get_backing_tensor(node.input(2));
-    ITensor                  *beta      = get_backing_tensor(node.input(3));
-    ITensor                  *gamma     = get_backing_tensor(node.input(4));
-    ITensor                  *output    = get_backing_tensor(node.output(0));
-    const float               epsilon   = node.epsilon();
-    const ActivationLayerInfo fused_act = node.fused_activation();
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<NEBatchNormalizationLayer>();
-    func->configure(input, output, mean, var, beta, gamma, epsilon, fused_act);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated NEBatchNormalizationLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Shape: " << input->info()->tensor_shape()
-                               << " Epsilon: " << epsilon << " "
-                               << (fused_act.enabled() ? to_string(fused_act.activation()) : "")
-                               << " InPlace : " << is_in_place_operation(input, output)
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend convolution layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend convolution layer function
- */
-std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node, GraphContext &ctx)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON ConvolutionLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 3);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ITensor *input   = get_backing_tensor(node.input(0));
-    ITensor *weights = get_backing_tensor(node.input(1));
-    ITensor *biases  = get_backing_tensor(node.input(2));
-    ITensor *output  = get_backing_tensor(node.output(0));
+    NETargetInfo::TensorType *input   = get_backing_tensor<NETargetInfo>(node.input(0));
+    NETargetInfo::TensorType *weights = get_backing_tensor<NETargetInfo>(node.input(1));
+    NETargetInfo::TensorType *biases  = get_backing_tensor<NETargetInfo>(node.input(2));
+    NETargetInfo::TensorType *output  = get_backing_tensor<NETargetInfo>(node.output(0));
 
     if(is_data_type_quantized_asymmetric(input->info()->data_type()))
     {
@@ -156,29 +102,30 @@
     std::shared_ptr<IMemoryManager> mm = get_memory_manager(ctx, Target::NEON);
     std::unique_ptr<IFunction>      func;
     std::string                     func_name;
-    if(conv_algorithm == ConvolutionMethod::DIRECT)
+    if(conv_algorithm == ConvolutionMethod::Direct)
     {
-        std::tie(func, func_name) = create_named_memory_managed_function<NEDirectConvolutionLayer>(std::string("NEDirectConvolutionLayer"), mm,
-                                                                                                   input, weights, biases, output, conv_info);
+        std::tie(func, func_name) = create_named_memory_managed_function<NEDirectConvolutionLayer>(
+                                        std::string("DirectConvolutionLayer"), mm, input, weights, biases, output, conv_info);
     }
     else if(conv_algorithm == ConvolutionMethod::GEMM)
     {
-        std::tie(func, func_name) = create_named_memory_managed_function<NEGEMMConvolutionLayer>(std::string("NEGEMMConvolutionLayer"), mm,
-                                                                                                 input, weights, biases, output, conv_info);
+        std::tie(func, func_name) = create_named_memory_managed_function<NEGEMMConvolutionLayer>(
+                                        std::string("GEMMConvolutionLayer"), mm, input, weights, biases, output, conv_info);
     }
-    else if(conv_algorithm == ConvolutionMethod::WINOGRAD)
+    else if(conv_algorithm == ConvolutionMethod::Winograd)
     {
-        std::tie(func, func_name) = create_named_memory_managed_function<NEWinogradConvolutionLayer>(std::string("NEWinogradConvolutionLayer"), mm,
-                                                                                                     input, weights, biases, output, conv_info);
+        std::tie(func, func_name) = create_named_memory_managed_function<NEWinogradConvolutionLayer>(
+                                        std::string("WinogradConvolutionLayer"), mm, input, weights, biases, output, conv_info);
     }
     else
     {
-        std::tie(func, func_name) = create_named_memory_managed_function<NEConvolutionLayer>(std::string("NEConvolutionLayer"), mm,
-                                                                                             input, weights, biases, output, conv_info);
+        std::tie(func, func_name) = create_named_memory_managed_function<NEConvolutionLayer>(
+                                        std::string("ConvolutionLayer"), mm, input, weights, biases, output, conv_info);
     }
 
     // Log info
     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
+                               << " Target " << NETargetInfo::TargetType
                                << " Data Type: " << input->info()->data_type()
                                << " Input QuantInfo: " << input->info()->quantization_info()
                                << " Weights QuantInfo: " << weights->info()->quantization_info()
@@ -189,244 +136,25 @@
     return func;
 }
 
-/** Create a backend layer depth concatenate function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend depth concatenate layer function
- */
-std::unique_ptr<arm_compute::IFunction> create_depth_concatenate_layer(DepthConcatenateLayerNode &node)
+template <>
+std::unique_ptr<IFunction> create_normalization_layer<NENormalizationLayer, NETargetInfo>(NormalizationLayerNode &node, GraphContext &ctx)
 {
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON DepthConcatenate node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Return nullptr if depth concatenate is switched off
-    if(!node.is_enabled())
-    {
-        return nullptr;
-    }
+    validate_node<NETargetInfo>(node, 1 /* expected inputs */, 1 /* expected outputs */);
 
     // Extract IO and info
-    std::vector<arm_compute::ITensor *> inputs;
-    for(unsigned int i = 0; i < node.num_inputs(); ++i)
-    {
-        inputs.push_back(get_backing_tensor(node.input(i)));
-    }
-    ITensor *output = get_backing_tensor(node.output(0));
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<NEDepthConcatenateLayer>();
-    func->configure(inputs, output);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated NEDepthConcatenateLayer"
-                               << " Data Type: " << output->info()->data_type()
-                               << " Shape: " << output->info()->tensor_shape()
-                               << " Num Inputs: " << inputs.size()
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend layer depth-wise convolution function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend depth-wise convolution layer function
- */
-std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvolutionLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON DepthwiseConvolutionLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 3);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ITensor *input   = get_backing_tensor(node.input(0));
-    ITensor *weights = get_backing_tensor(node.input(1));
-    ITensor *biases  = get_backing_tensor(node.input(2));
-    ITensor *output  = get_backing_tensor(node.output(0));
-
-    if(is_data_type_quantized_asymmetric(input->info()->data_type()))
-    {
-        biases->info()->set_data_type(DataType::S32);
-    }
-
-    const PadStrideInfo              conv_info     = node.convolution_info();
-    const DepthwiseConvolutionMethod dwc_algorithm = node.depthwise_convolution_method();
-
-    // Create and configure function (we assume that functions have been validated before creation)
-    std::unique_ptr<IFunction> func;
-    std::string                func_name;
-    if(dwc_algorithm == DepthwiseConvolutionMethod::OPTIMIZED_3x3)
-    {
-        std::tie(func, func_name) = create_named_function<NEDepthwiseConvolutionLayer3x3>(std::string("NEDepthwiseConvolutionLayer3x3"),
-                                                                                          input, weights, biases, output, conv_info);
-    }
-    else
-    {
-        std::tie(func, func_name) = create_named_function<NEDepthwiseConvolutionLayer>(std::string("NEDepthwiseConvolutionLayer"),
-                                                                                       input, weights, biases, output, conv_info);
-    }
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input QuantInfo: " << input->info()->quantization_info()
-                               << " Weights QuantInfo: " << weights->info()->quantization_info()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Weights shape: " << weights->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << std::endl);
-    return func;
-}
-
-/** Create a backend element-wise operation layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend element-wise operation layer function
- */
-std::unique_ptr<IFunction> create_eltwise_layer(EltwiseLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON EltwiseLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 2);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ITensor               *input1         = get_backing_tensor(node.input(0));
-    ITensor               *input2         = get_backing_tensor(node.input(1));
-    ITensor               *output         = get_backing_tensor(node.output(0));
-    const EltwiseOperation eltwise_op     = node.eltwise_operation();
-    const ConvertPolicy    convert_policy = node.convert_policy();
-    ARM_COMPUTE_ERROR_ON(input1 == nullptr);
-    ARM_COMPUTE_ERROR_ON(input2 == nullptr);
-    ARM_COMPUTE_ERROR_ON(output == nullptr);
-
-    std::unique_ptr<IFunction> func = nullptr;
-    std::string                func_name;
-    if(eltwise_op == EltwiseOperation::ADD)
-    {
-        std::tie(func, func_name) = create_named_function<NEArithmeticAddition>(std::string("NEArithmeticAddition"),
-                                                                                input1, input2, output, convert_policy);
-    }
-    else if(eltwise_op == EltwiseOperation::SUB)
-    {
-        std::tie(func, func_name) = create_named_function<NEArithmeticSubtraction>(std::string("NEArithmeticSubtraction"),
-                                                                                   input1, input2, output, convert_policy);
-    }
-    else if(eltwise_op == EltwiseOperation::MUL)
-    {
-        std::tie(func, func_name) = create_named_function<NEPixelWiseMultiplication>(std::string("NEPixelWiseMultiplication"),
-                                                                                     input1, input2, output, 1.f,
-                                                                                     convert_policy, node.rounding_policy());
-    }
-    else
-    {
-        ARM_COMPUTE_ERROR("Unsupported element-wise operation!");
-    }
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
-                               << " Data Type: " << input1->info()->data_type()
-                               << " Shape : " << input1->info()->tensor_shape()
-                               << std::endl);
-
-    return func;
-}
-
-/** Create a backend flatten layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend flatten layer function
- */
-std::unique_ptr<IFunction> create_flatten_layer(FlattenLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON FlattenLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ITensor *input  = get_backing_tensor(node.input(0));
-    ITensor *output = get_backing_tensor(node.output(0));
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<NEFlattenLayer>();
-    func->configure(input, output);
-    ARM_COMPUTE_ERROR_ON(input == nullptr);
-    ARM_COMPUTE_ERROR_ON(output == nullptr);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated NEFlattenLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend fully connected layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend fully connected layer function
- */
-std::unique_ptr<IFunction> create_fully_connected_layer(FullyConnectedLayerNode &node, GraphContext &ctx)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON FullyConnectedLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 3);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ITensor *input   = get_backing_tensor(node.input(0));
-    ITensor *weights = get_backing_tensor(node.input(1));
-    ITensor *biases  = get_backing_tensor(node.input(2));
-    ITensor *output  = get_backing_tensor(node.output(0));
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<NEFullyConnectedLayer>(get_memory_manager(ctx, Target::NEON));
-    func->configure(input, weights, biases, output);
-    ARM_COMPUTE_ERROR_ON(input == nullptr);
-    ARM_COMPUTE_ERROR_ON(weights == nullptr);
-    ARM_COMPUTE_ERROR_ON(output == nullptr);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated NEFullyConnectedLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Weights shape: " << weights->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend normalization layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend normalization layer function
- */
-std::unique_ptr<IFunction> create_normalization_layer(NormalizationLayerNode &node, GraphContext &ctx)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON NormalizationLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ITensor                     *input     = get_backing_tensor(node.input(0));
-    ITensor                     *output    = get_backing_tensor(node.output(0));
+    NETargetInfo::TensorType    *input     = get_backing_tensor<NETargetInfo>(node.input(0));
+    NETargetInfo::TensorType    *output    = get_backing_tensor<NETargetInfo>(node.output(0));
     const NormalizationLayerInfo norm_info = node.normalization_info();
     ARM_COMPUTE_ERROR_ON(input == nullptr);
     ARM_COMPUTE_ERROR_ON(output == nullptr);
 
     // Create and configure function
-    auto func = support::cpp14::make_unique<NENormalizationLayer>(get_memory_manager(ctx, Target::NEON));
+    auto func = support::cpp14::make_unique<NENormalizationLayer>(get_memory_manager(ctx, NETargetInfo::TargetType));
     func->configure(input, output, norm_info);
 
     // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated NENormalizationLayer"
+    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << node.type()
+                               << " Target " << NETargetInfo::TargetType
                                << " Data Type: " << input->info()->data_type()
                                << " Input shape: " << input->info()->tensor_shape()
                                << " Output shape: " << output->info()->tensor_shape()
@@ -435,106 +163,7 @@
 
     return std::move(func);
 }
-
-/** Create a backend pooling layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend pooling layer function
- */
-std::unique_ptr<IFunction> create_pooling_layer(PoolingLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON PoolingLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ITensor               *input     = get_backing_tensor(node.input(0));
-    ITensor               *output    = get_backing_tensor(node.output(0));
-    const PoolingLayerInfo pool_info = node.pooling_info();
-    ARM_COMPUTE_ERROR_ON(input == nullptr);
-    ARM_COMPUTE_ERROR_ON(output == nullptr);
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<NEPoolingLayer>();
-    func->configure(input, output, pool_info);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated NEPoolingLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << " Pooling info: " << pool_info.pool_type()
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend reshape layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend reshape layer function
- */
-std::unique_ptr<IFunction> create_reshape_layer(ReshapeLayerNode &node)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON ReshapeLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ITensor *input  = get_backing_tensor(node.input(0));
-    ITensor *output = get_backing_tensor(node.output(0));
-    ARM_COMPUTE_ERROR_ON(input == nullptr);
-    ARM_COMPUTE_ERROR_ON(output == nullptr);
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<NEReshapeLayer>();
-    func->configure(input, output);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated NEReshapeLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << std::endl);
-
-    return std::move(func);
-}
-
-/** Create a backend softmax layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend softmax layer function
- */
-std::unique_ptr<IFunction> create_softmax_layer(SoftmaxLayerNode &node, GraphContext &ctx)
-{
-    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON SoftmaxLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-    ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
-    ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
-    // Extract IO and info
-    ITensor    *input  = get_backing_tensor(node.input(0));
-    ITensor    *output = get_backing_tensor(node.output(0));
-    const float beta   = node.beta();
-    ARM_COMPUTE_ERROR_ON(input == nullptr);
-    ARM_COMPUTE_ERROR_ON(output == nullptr);
-
-    // Create and configure function
-    auto func = support::cpp14::make_unique<NESoftmaxLayer>(get_memory_manager(ctx, Target::NEON));
-    func->configure(input, output, beta);
-
-    // Log info
-    ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated NESoftmaxLayer"
-                               << " Data Type: " << input->info()->data_type()
-                               << " Input shape: " << input->info()->tensor_shape()
-                               << " Output shape: " << output->info()->tensor_shape()
-                               << std::endl);
-
-    return std::move(func);
-}
-} // namespace
+} // namespace detail
 
 std::unique_ptr<IFunction> NEFunctionFactory::create(INode *node, GraphContext &ctx)
 {
@@ -547,33 +176,39 @@
     switch(type)
     {
         case NodeType::ActivationLayer:
-            return create_activation_layer(*polymorphic_downcast<ActivationLayerNode *>(node));
+            return detail::create_activation_layer<NEActivationLayer, NETargetInfo>(*polymorphic_downcast<ActivationLayerNode *>(node));
         case NodeType::BatchNormalizationLayer:
-            return create_batch_normalization_layer(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
+            return detail::create_batch_normalization_layer<NEBatchNormalizationLayer, NETargetInfo>(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
         case NodeType::ConvolutionLayer:
-            return create_convolution_layer(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
-        case NodeType::DepthConcatenateLayer:
-            return create_depth_concatenate_layer(*polymorphic_downcast<DepthConcatenateLayerNode *>(node));
+            return detail::create_convolution_layer<NEConvolutionLayerFunctions, NETargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
+        case NodeType::DeconvolutionLayer:
+            return detail::create_deconvolution_layer<NEDeconvolutionLayer, NETargetInfo>(*polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
+        case NodeType::ConcatenateLayer:
+            return detail::create_concatenate_layer<NEConcatenateLayer, NETargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
         case NodeType::DepthwiseConvolutionLayer:
-            return create_depthwise_convolution_layer(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+            return detail::create_depthwise_convolution_layer<NEDepthwiseConvolutionLayerFunctions, NETargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
         case NodeType::EltwiseLayer:
-            return create_eltwise_layer(*polymorphic_downcast<EltwiseLayerNode *>(node));
+            return detail::create_eltwise_layer<NEEltwiseFunctions, NETargetInfo>(*polymorphic_downcast<EltwiseLayerNode *>(node));
         case NodeType::FlattenLayer:
-            return create_flatten_layer(*polymorphic_downcast<FlattenLayerNode *>(node));
+            return detail::create_flatten_layer<NEFlattenLayer, NETargetInfo>(*polymorphic_downcast<FlattenLayerNode *>(node));
         case NodeType::FullyConnectedLayer:
-            return create_fully_connected_layer(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
+            return detail::create_fully_connected_layer<NEFullyConnectedLayer, NETargetInfo>(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
         case NodeType::NormalizationLayer:
-            return create_normalization_layer(*polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
+            return detail::create_normalization_layer<NENormalizationLayer, NETargetInfo>(*polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
+        case NodeType::PermuteLayer:
+            return detail::create_permute_layer<NEPermute, NETargetInfo>(*polymorphic_downcast<PermuteLayerNode *>(node));
         case NodeType::PoolingLayer:
-            return create_pooling_layer(*polymorphic_downcast<PoolingLayerNode *>(node));
+            return detail::create_pooling_layer<NEPoolingLayer, NETargetInfo>(*polymorphic_downcast<PoolingLayerNode *>(node));
         case NodeType::ReshapeLayer:
-            return create_reshape_layer(*polymorphic_downcast<ReshapeLayerNode *>(node));
+            return detail::create_reshape_layer<NEReshapeLayer, NETargetInfo>(*polymorphic_downcast<ReshapeLayerNode *>(node));
+        case NodeType::ResizeLayer:
+            return detail::create_resize_layer<NEScale, NETargetInfo>(*polymorphic_downcast<ResizeLayerNode *>(node));
         case NodeType::SoftmaxLayer:
-            return create_softmax_layer(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
+            return detail::create_softmax_layer<NESoftmaxLayer, NETargetInfo>(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
         default:
             return nullptr;
     }
 }
 } // namespace backends
 } // namespace graph
-} // namespace arm_compute
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/graph/backends/NEON/NENodeValidator.cpp b/src/graph/backends/NEON/NENodeValidator.cpp
index e438e79..58ffaf0 100644
--- a/src/graph/backends/NEON/NENodeValidator.cpp
+++ b/src/graph/backends/NEON/NENodeValidator.cpp
@@ -47,6 +47,8 @@
     NodeType type = node->type();
     switch(type)
     {
+        case NodeType::ChannelShuffleLayer:
+            return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : ChannelShuffleLayer");
         case NodeType::ConvolutionLayer:
             return detail::validate_convolution_layer<NEConvolutionLayer,
                    NEDirectConvolutionLayer,
@@ -55,7 +57,8 @@
         case NodeType::DepthwiseConvolutionLayer:
             return detail::validate_depthwise_convolution_layer<NEDepthwiseConvolutionLayer,
                    NEDepthwiseConvolutionLayer3x3>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
-
+        case NodeType::PermuteLayer:
+            return detail::validate_permute_layer<NEPermute>(*polymorphic_downcast<PermuteLayerNode *>(node));
         default:
             return Status{};
     }
diff --git a/src/graph/detail/ExecutionHelpers.cpp b/src/graph/detail/ExecutionHelpers.cpp
index c370fdf..f479963 100644
--- a/src/graph/detail/ExecutionHelpers.cpp
+++ b/src/graph/detail/ExecutionHelpers.cpp
@@ -35,14 +35,6 @@
 {
 namespace detail
 {
-void default_initialize_backends()
-{
-    for(const auto &backend : backends::BackendRegistry::get().backends())
-    {
-        backend.second->initialize_backend();
-    }
-}
-
 void validate_all_nodes(Graph &g)
 {
     auto &nodes = g.nodes();
@@ -52,10 +44,9 @@
     {
         if(node != nullptr)
         {
-            Target assigned_target = node->assigned_target();
-            auto   backend         = backends::BackendRegistry::get().find_backend(assigned_target);
-            ARM_COMPUTE_ERROR_ON_MSG(!backend, "Requested backend doesn't exist!");
-            Status status = backend->validate_node(*node);
+            Target                    assigned_target = node->assigned_target();
+            backends::IDeviceBackend &backend         = backends::BackendRegistry::get().get_backend(assigned_target);
+            Status                    status          = backend.validate_node(*node);
             ARM_COMPUTE_ERROR_ON_MSG(!bool(status), status.error_description().c_str());
         }
     }
@@ -67,13 +58,12 @@
 
     for(auto &tensor : tensors)
     {
-        if(tensor)
+        if(tensor && tensor->handle() == nullptr)
         {
-            Target target  = tensor->desc().target;
-            auto   backend = backends::BackendRegistry::get().find_backend(target);
-            ARM_COMPUTE_ERROR_ON_MSG(!backend, "Requested backend doesn't exist!");
-            auto handle = backend->create_tensor(*tensor);
-            ARM_COMPUTE_ERROR_ON_MSG(!backend, "Couldn't create backend handle!");
+            Target                         target  = tensor->desc().target;
+            backends::IDeviceBackend      &backend = backends::BackendRegistry::get().get_backend(target);
+            std::unique_ptr<ITensorHandle> handle  = backend.create_tensor(*tensor);
+            ARM_COMPUTE_ERROR_ON_MSG(!handle, "Couldn't create backend handle!");
             tensor->set_handle(std::move(handle));
         }
     }
@@ -139,35 +129,33 @@
     }
 }
 
-ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx)
+ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx, const std::vector<NodeID> &node_order)
 {
     ExecutionWorkload workload;
     workload.graph = &g;
     workload.ctx   = &ctx;
 
-    auto &nodes = g.nodes();
-
     // Create tasks
-    for(auto &node : nodes)
+    for(auto &node_id : node_order)
     {
+        auto node = g.node(node_id);
         if(node != nullptr)
         {
-            Target assigned_target = node->assigned_target();
-            auto   backend         = backends::BackendRegistry::get().find_backend(assigned_target);
-            ARM_COMPUTE_ERROR_ON_MSG(!backend, "Requested backend doesn't exist!");
-            auto func = backend->configure_node(*node, ctx);
+            Target                     assigned_target = node->assigned_target();
+            backends::IDeviceBackend &backend         = backends::BackendRegistry::get().get_backend(assigned_target);
+            std::unique_ptr<IFunction> func            = backend.configure_node(*node, ctx);
             if(func != nullptr)
             {
                 ExecutionTask task;
                 task.task = std::move(func);
-                task.node = node.get();
+                task.node = node;
                 workload.tasks.push_back(std::move(task));
             }
         }
     }
 
     // Add inputs and outputs
-    for(auto &node : nodes)
+    for(auto &node : g.nodes())
     {
         if(node != nullptr && node->type() == NodeType::Input)
         {
@@ -214,15 +202,12 @@
     }
 }
 
-void call_all_input_node_accessors(ExecutionWorkload &workload)
+bool call_all_input_node_accessors(ExecutionWorkload &workload)
 {
-    for(auto &input : workload.inputs)
+    return !std::any_of(std::begin(workload.inputs), std::end(workload.inputs), [](Tensor * input_tensor)
     {
-        if(input != nullptr)
-        {
-            input->call_accessor();
-        }
-    }
+        return (input_tensor == nullptr) || !input_tensor->call_accessor();
+    });
 }
 
 void prepare_all_tasks(ExecutionWorkload &workload)
@@ -264,16 +249,16 @@
     }
 }
 
-void call_all_output_node_accessors(ExecutionWorkload &workload)
+bool call_all_output_node_accessors(ExecutionWorkload &workload)
 {
-    for(auto &output : workload.outputs)
+    bool is_valid = true;
+    std::for_each(std::begin(workload.outputs), std::end(workload.outputs), [&](Tensor * output_tensor)
     {
-        if(output != nullptr)
-        {
-            output->call_accessor();
-        }
-    }
+        is_valid = is_valid && (output_tensor != nullptr) && output_tensor->call_accessor();
+    });
+
+    return is_valid;
 }
 } // namespace detail
 } // namespace graph
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/frontend/Stream.cpp b/src/graph/frontend/Stream.cpp
index 96a166c..878d688 100644
--- a/src/graph/frontend/Stream.cpp
+++ b/src/graph/frontend/Stream.cpp
@@ -33,7 +33,7 @@
 namespace frontend
 {
 Stream::Stream(size_t id, std::string name)
-    : _manager(), _ctx(), _g(id, std::move(name))
+    : _ctx(), _manager(), _g(id, std::move(name))
 {
 }
 
@@ -66,4 +66,4 @@
 }
 } // namespace frontend
 } // namespace graph
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/mutators/DepthConcatSubTensorMutator.cpp b/src/graph/mutators/DepthConcatSubTensorMutator.cpp
index c56f4c5..a170c4d 100644
--- a/src/graph/mutators/DepthConcatSubTensorMutator.cpp
+++ b/src/graph/mutators/DepthConcatSubTensorMutator.cpp
@@ -25,8 +25,10 @@
 
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Utils.h"
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
 #include "arm_compute/graph/backends/BackendRegistry.h"
-#include "arm_compute/graph/nodes/DepthConcatenateLayerNode.h"
+#include "arm_compute/graph/nodes/ConcatenateLayerNode.h"
 
 #include "arm_compute/core/utils/misc/Cast.h"
 #include "arm_compute/core/utils/misc/Iterable.h"
@@ -42,14 +44,31 @@
 
 void DepthConcatSubTensorMutator::mutate(Graph &g)
 {
-    // Should be in reverse order of execution
-    for(auto &node : arm_compute::utils::iterable::reverse_iterate(g.nodes()))
+    // Early exit if no Concatenation layers exist in graph
+    if(g.nodes(NodeType::ConcatenateLayer).empty())
     {
-        if(node && node->type() == NodeType::DepthConcatenateLayer && node->output(0) != nullptr)
+        return;
+    }
+
+    // Perform topological sort
+    std::vector<NodeID> topological_sorted_node_ids = dfs(g);
+
+    // Should be in reverse order of execution
+    for(auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids))
+    {
+        INode *node = g.node(node_id);
+        if(node != nullptr && node->type() == NodeType::ConcatenateLayer && node->output(0) != nullptr)
         {
             // Get output tensor
             auto output_tensor = node->output(0);
 
+            // Check concatenation axis (Sub-tensor optimization is support for concatenation axis >=2)
+            auto *concat_node = arm_compute::utils::cast::polymorphic_downcast<ConcatenateLayerNode *>(node);
+            if(output_tensor == nullptr || get_dimension_idx(output_tensor->desc(), concat_node->concatenation_axis()) < 2)
+            {
+                continue;
+            }
+
             // Check that all tensor have the same target and valid inputs
             bool is_valid = std::all_of(node->input_edges().cbegin(), node->input_edges().cend(),
                                         [&](const EdgeID & eid)
@@ -58,7 +77,7 @@
             });
 
             // Create subtensors
-            if(is_valid && backends::BackendRegistry::get().find_backend(output_tensor->desc().target) != nullptr)
+            if(is_valid && is_target_supported(output_tensor->desc().target))
             {
                 ARM_COMPUTE_LOG_GRAPH_VERBOSE("Using sub-tensors for the node with ID : "
                                               << node->id() << " and name : " << node->name() << std::endl);
@@ -69,14 +88,14 @@
                     auto       input_tensor = node->input(i);
                     const auto input_shape  = input_tensor->desc().shape;
 
-                    auto backend = backends::BackendRegistry::get().find_backend(input_tensor->desc().target);
-                    auto handle  = backend->create_subtensor(output_tensor->handle(), input_shape, Coordinates(0, 0, depth), false);
+                    backends::IDeviceBackend      &backend = backends::BackendRegistry::get().get_backend(input_tensor->desc().target);
+                    std::unique_ptr<ITensorHandle> handle  = backend.create_subtensor(output_tensor->handle(), input_shape, Coordinates(0, 0, depth), false);
                     input_tensor->set_handle(std::move(handle));
 
                     depth += input_shape.z();
                 }
 
-                auto *dc_node = arm_compute::utils::cast::polymorphic_downcast<DepthConcatenateLayerNode *>(node.get());
+                auto *dc_node = arm_compute::utils::cast::polymorphic_downcast<ConcatenateLayerNode *>(node);
                 dc_node->set_enabled(false);
             }
         }
diff --git a/src/graph/mutators/GroupedConvolutionMutator.cpp b/src/graph/mutators/GroupedConvolutionMutator.cpp
new file mode 100644
index 0000000..0d65d6a
--- /dev/null
+++ b/src/graph/mutators/GroupedConvolutionMutator.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/mutators/GroupedConvolutionMutator.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/GraphBuilder.h"
+#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Utils.h"
+#include "arm_compute/graph/backends/BackendRegistry.h"
+#include "arm_compute/graph/nodes/Nodes.h"
+
+#include "arm_compute/core/utils/misc/Cast.h"
+
+#include <set>
+
+namespace arm_compute
+{
+namespace graph
+{
+namespace
+{
+NodeID create_grouped_convolution(Graph &g, const NodeParams &params, NodeIdxPair input, NodeID weights, NodeID bias,
+                                  PadStrideInfo conv_info, ConvolutionMethod method, FastMathHint fast_math_hint, unsigned int num_groups)
+{
+    bool has_bias = (bias != EmptyNodeID);
+
+    // Split input
+    const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
+    const unsigned int     input_idx         = get_dimension_idx(input_tensor_desc, DataLayoutDimension::CHANNEL);
+    NodeID                 input_split       = GraphBuilder::add_split_node(g, params, input, num_groups, input_idx);
+
+    // Split weights
+    const TensorDescriptor weights_tensor_desc = get_tensor_descriptor(g, g.node(weights)->outputs()[0]);
+    const unsigned int     batch_idx           = get_dimension_idx(weights_tensor_desc, DataLayoutDimension::BATCHES);
+    NodeID                 weights_split       = GraphBuilder::add_split_node(g, params, { weights, 0 }, num_groups, batch_idx);
+
+    // Split bias
+    NodeID bias_split = EmptyNodeID;
+    if(has_bias)
+    {
+        // Split bias
+        bias_split = GraphBuilder::add_split_node(g, params, { bias, 0 }, num_groups, 0);
+    }
+
+    std::vector<NodeIdxPair> convolution_outputs;
+    for(unsigned int i = 0; i < num_groups; ++i)
+    {
+        NodeParams group_params = params;
+        NodeID     conv_nid     = g.add_node<ConvolutionLayerNode>(conv_info, 1, method, fast_math_hint);
+        g.add_connection(input_split, i, conv_nid, 0);
+        g.add_connection(weights_split, i, conv_nid, 1);
+        if(has_bias)
+        {
+            g.add_connection(bias_split, i, conv_nid, 2);
+        }
+
+        // Add group name
+        if(!group_params.name.empty())
+        {
+            group_params.name.append("_g" + arm_compute::support::cpp11::to_string(i));
+        }
+
+        // Set node parameters
+        INode *node = g.node(conv_nid);
+        ARM_COMPUTE_ERROR_ON(node == nullptr);
+        node->set_common_node_parameters(group_params);
+
+        convolution_outputs.push_back({ conv_nid, 0 });
+    }
+
+    // Depth concatenate output
+    return GraphBuilder::add_concatenate_node(g, params, convolution_outputs, DataLayoutDimension::CHANNEL);
+}
+} // namespace
+
+const char *GroupedConvolutionMutator::name()
+{
+    return "GroupedConvolutionMutator";
+}
+
+void GroupedConvolutionMutator::mutate(Graph &g)
+{
+    // Early exit if no Convolution layers exist in graph
+    if(g.nodes(NodeType::ConvolutionLayer).empty())
+    {
+        return;
+    }
+
+    // Total nodes
+    size_t total_nodes = g.nodes().size();
+
+    // Iterate over convolution nodes
+    for(unsigned int i = 0; i < total_nodes; ++i)
+    {
+        INode *node = g.node(i);
+        if(node != nullptr && node->type() == NodeType::ConvolutionLayer && arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node)->num_groups() != 1)
+        {
+            // Validate node
+            backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(node->assigned_target());
+            Status                    status  = backend.validate_node(*node);
+
+            // If grouped convolution is not supported
+            if(!bool(status))
+            {
+                // Down-cast node
+                auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node);
+
+                // Get internal convolution info
+                const PadStrideInfo     conv_info       = conv_node->convolution_info();
+                const ConvolutionMethod conv_method     = conv_node->convolution_method();
+                const FastMathHint      fast_math_hint  = conv_node->fast_math_hint();
+                const unsigned int      num_groups      = conv_node->num_groups();
+                const NodeParams        params          = conv_node->common_node_params();
+                const Target            assigned_target = conv_node->assigned_target();
+
+                // Extract node ids
+                const NodeID input_id   = conv_node->input_id(0);
+                const NodeID weights_id = conv_node->input_id(1);
+                const NodeID bias_id    = conv_node->input_id(2);
+
+                // Get driving nodes
+                std::vector<NodeIdxPair> driving_nodes = get_driving_nodes(*node);
+
+                // Extract activation node accessor if any
+                auto node_accessor = conv_node->output(0)->extract_accessor();
+
+                // Current max tensor and node id
+                TensorID latest_tid = g.tensors().size();
+                NodeID   latest_nid = g.nodes().size();
+
+                // Create grouped convolution node
+                NodeID grouped_conv_id = create_grouped_convolution(g, params, { input_id, 0 }, weights_id, bias_id,
+                                                                    conv_info, conv_method, fast_math_hint, num_groups);
+
+                // Remove convolution node
+                g.remove_node(node->id());
+
+                // Update batch normalization node outputs
+                for(auto &driving_node : driving_nodes)
+                {
+                    g.add_connection(grouped_conv_id, 0, driving_node.node_id, driving_node.index);
+                }
+
+                // Update accessor to batch normalization node
+                g.node(grouped_conv_id)->output(0)->set_accessor(std::move(node_accessor));
+
+                // Configure new tensors and nodes
+                std::for_each(g.tensors().begin() + latest_tid, g.tensors().end(), [](std::unique_ptr<Tensor> &t)
+                {
+                    configure_tensor(t.get());
+                });
+                std::for_each(g.nodes().begin() + latest_nid, g.nodes().end(), [&assigned_target](std::unique_ptr<INode> &n)
+                {
+                    if(n != nullptr)
+                    {
+                        n->set_assigned_target(assigned_target);
+                    }
+                });
+            }
+        }
+    }
+}
+} // namespace graph
+} // namespace arm_compute
diff --git a/src/graph/mutators/InPlaceOperationMutator.cpp b/src/graph/mutators/InPlaceOperationMutator.cpp
index bd3f098..31921b3 100644
--- a/src/graph/mutators/InPlaceOperationMutator.cpp
+++ b/src/graph/mutators/InPlaceOperationMutator.cpp
@@ -50,11 +50,26 @@
             // Check if parent has a single output if yes then force in place calculation else not
             if((input_edge != nullptr) && (input_edge->producer() != nullptr) && (input_edge->producer()->output_edges().size() == 1))
             {
-                ARM_COMPUTE_LOG_GRAPH_VERBOSE("Switching to in-place computation for the node with ID : "
-                                              << node->id() << " and name : " << node->name() << std::endl);
-                // Update output
-                auto tensor = input_edge->tensor();
-                node->set_output_tensor(tensor->id(), 0);
+                // Get current and new output tensors
+                auto current_output_tensor = node->output(0);
+                auto new_output_tensor     = input_edge->tensor();
+
+                ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr || new_output_tensor == nullptr);
+
+                // Prevent in-place operation if there is an accessor bound to the in-place tensor
+                if(new_output_tensor->accessor() == nullptr)
+                {
+                    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Switching to in-place computation for the node with ID : "
+                                                  << node->id() << " and name : " << node->name() << std::endl);
+                    // Update accessor
+                    new_output_tensor->set_accessor(current_output_tensor->extract_accessor());
+                    // Update output
+                    node->set_output_tensor(new_output_tensor->id(), 0);
+                }
+                else
+                {
+                    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor\n");
+                }
             }
         }
     }
diff --git a/src/graph/mutators/NodeExecutionMethodMutator.cpp b/src/graph/mutators/NodeExecutionMethodMutator.cpp
new file mode 100644
index 0000000..b420121
--- /dev/null
+++ b/src/graph/mutators/NodeExecutionMethodMutator.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/mutators/NodeExecutionMethodMutator.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Utils.h"
+#include "arm_compute/graph/backends/BackendRegistry.h"
+#include "arm_compute/graph/nodes/Nodes.h"
+
+#include "arm_compute/core/utils/misc/Cast.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+namespace
+{
+/** Runs a default setter function on a given types of nodes
+ *
+ * @tparam Setter Setter function to run
+ *
+ * @param[in, out] g         Graph to extract the nodes from
+ * @param[in]      node_type Node type
+ * @param[in]      setter    Setter function
+ */
+template <typename Setter>
+void set_default_on_invalid_method(Graph &g, NodeType node_type, Setter &&setter)
+{
+    const std::vector<NodeID> &node_ids = g.nodes(node_type);
+    for(auto &node_id : node_ids)
+    {
+        INode *node = g.node(node_id);
+        if(node != nullptr)
+        {
+            // Validate node
+            backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(node->assigned_target());
+            Status                    status  = backend.validate_node(*node);
+
+            // Set default execution method in case of failure
+            if(!bool(status))
+            {
+                setter(node);
+            }
+        }
+    }
+}
+} // namespace
+
+const char *NodeExecutionMethodMutator::name()
+{
+    return "NodeExecutionMethodMutator";
+}
+
+void NodeExecutionMethodMutator::mutate(Graph &g)
+{
+    // Convolution Layer
+    set_default_on_invalid_method(g, NodeType::ConvolutionLayer, [](INode * n)
+    {
+        ARM_COMPUTE_LOG_GRAPH_INFO("Switched ConvolutionLayer method of node with ID : "
+                                   << n->id() << " and Name: " << n->name() << std::endl);
+        auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(n);
+        casted_node->set_convolution_method(ConvolutionMethod::Default);
+    });
+
+    // Depthwise Convolution Layer
+    set_default_on_invalid_method(g, NodeType::DepthwiseConvolutionLayer, [](INode * n)
+    {
+        ARM_COMPUTE_LOG_GRAPH_INFO("Switched Depthwise ConvolutionLayer method of node with ID : "
+                                   << n->id() << " and Name: " << n->name() << std::endl);
+        auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(n);
+        casted_node->set_depthwise_convolution_method(DepthwiseConvolutionMethod::Default);
+    });
+}
+} // namespace graph
+} // namespace arm_compute
diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp
index 2e893c2..82bfe25 100644
--- a/src/graph/mutators/NodeFusionMutator.cpp
+++ b/src/graph/mutators/NodeFusionMutator.cpp
@@ -25,10 +25,13 @@
 
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Utils.h"
 #include "arm_compute/graph/nodes/Nodes.h"
 
 #include "arm_compute/core/utils/misc/Cast.h"
 
+#include <set>
+
 namespace arm_compute
 {
 namespace graph
@@ -37,6 +40,9 @@
 {
 void fuse_batch_norm_with_activation(Graph &g)
 {
+    // Supported activations when fusing
+    const std::set<Activation> supported_fused_activations = { Activation::RELU, Activation::BOUNDED_RELU, Activation::LU_BOUNDED_RELU };
+
     // Not interested in the order of nodes
     for(auto &node : g.nodes())
     {
@@ -48,34 +54,47 @@
             // Check if following node is an activation layer node
             if((output_edge != nullptr) && (output_edge->consumer() != nullptr) && (output_edge->consumer()->type() == NodeType::ActivationLayer))
             {
-                ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing Batch Normalization node with ID : " << output_edge->producer_id()
-                                              << " with Activation Layer node with ID : " << output_edge->consumer_id() << std::endl);
-
                 auto *bn_node  = arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->producer());
                 auto *act_node = arm_compute::utils::cast::polymorphic_downcast<ActivationLayerNode *>(output_edge->consumer());
 
-                // Get driving nodes of activation node
-                std::vector<NodeIdxPair> act_driving_nodes;
-                for(auto &act_output_edge_id : act_node->output_edges())
+                ARM_COMPUTE_ERROR_ON(act_node->output(0) == nullptr || bn_node->output(0) == nullptr);
+
+                // Check if activation is supported for fusion
+                if(supported_fused_activations.count(act_node->activation_info().activation()) == 0)
                 {
-                    auto act_output_edge = g.edge(act_output_edge_id);
-                    if(act_output_edge != nullptr)
-                    {
-                        ARM_COMPUTE_ERROR_ON(act_output_edge->consumer() == nullptr);
-                        act_driving_nodes.push_back({ act_output_edge->consumer_id(), act_output_edge->consumer_idx() });
-                    }
+                    continue;
                 }
 
-                // Set activation info to batch normalization
-                bn_node->set_fused_activation(act_node->activation_info());
+                ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing Batch Normalization node with ID : " << output_edge->producer_id()
+                                              << " with Activation Layer node with ID : " << output_edge->consumer_id() << std::endl);
 
-                // Remove activation node
-                g.remove_node(act_node->id());
-
-                // Update batch normalization node outputs
-                for(auto &driving_node : act_driving_nodes)
+                // Prevent fusion if batch normalization node has an output accessor
+                if(bn_node->output(0)->accessor() == nullptr)
                 {
-                    g.add_connection(bn_node->id(), 0, driving_node.node_id, driving_node.index);
+                    // Get driving nodes of activation node
+                    std::vector<NodeIdxPair> act_driving_nodes = get_driving_nodes(*act_node);
+
+                    // Set activation info to batch normalization
+                    bn_node->set_fused_activation(act_node->activation_info());
+
+                    // Extract activation node accessor if any
+                    auto act_node_accessor = act_node->output(0)->extract_accessor();
+
+                    // Remove activation node
+                    g.remove_node(act_node->id());
+
+                    // Update batch normalization node outputs
+                    for(auto &driving_node : act_driving_nodes)
+                    {
+                        g.add_connection(bn_node->id(), 0, driving_node.node_id, driving_node.index);
+                    }
+
+                    // Update accessor to batch normalization node
+                    bn_node->output(0)->set_accessor(std::move(act_node_accessor));
+                }
+                else
+                {
+                    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion as batch normalization node has an output accessor\n");
                 }
             }
         }
diff --git a/src/graph/mutators/SplitLayerSubTensorMutator.cpp b/src/graph/mutators/SplitLayerSubTensorMutator.cpp
index 2a8c029..e21252a 100644
--- a/src/graph/mutators/SplitLayerSubTensorMutator.cpp
+++ b/src/graph/mutators/SplitLayerSubTensorMutator.cpp
@@ -25,6 +25,8 @@
 
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Utils.h"
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
 #include "arm_compute/graph/backends/BackendRegistry.h"
 #include "arm_compute/graph/nodes/SplitLayerNode.h"
 
@@ -42,10 +44,20 @@
 
 void SplitLayerSubTensorMutator::mutate(Graph &g)
 {
-    // Should be in reverse order of execution
-    for(auto &node : arm_compute::utils::iterable::reverse_iterate(g.nodes()))
+    // Early exit if no Split layers exist in graph
+    if(g.nodes(NodeType::SplitLayer).empty())
     {
-        if(node && node->type() == NodeType::SplitLayer && node->input(0) != nullptr)
+        return;
+    }
+
+    // Perform topological sort
+    std::vector<NodeID> topological_sorted_node_ids = dfs(g);
+
+    // Should be in reverse order of execution
+    for(auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids))
+    {
+        INode *node = g.node(node_id);
+        if(node != nullptr && node->type() == NodeType::SplitLayer && node->input(0) != nullptr)
         {
             // Get output tensor
             Tensor *input_tensor = node->input(0);
@@ -58,12 +70,12 @@
             });
 
             // Create subtensors
-            if(is_valid && backends::BackendRegistry::get().find_backend(input_tensor->desc().target) != nullptr)
+            if(is_valid && is_target_supported(input_tensor->desc().target))
             {
                 ARM_COMPUTE_LOG_GRAPH_VERBOSE("Using sub-tensors for the node with ID : "
                                               << node->id() << " and name : " << node->name() << std::endl);
 
-                auto *split_node = arm_compute::utils::cast::polymorphic_downcast<SplitLayerNode *>(node.get());
+                auto *split_node = arm_compute::utils::cast::polymorphic_downcast<SplitLayerNode *>(node);
 
                 const unsigned int axis          = split_node->axis();
                 const unsigned int num_splits    = split_node->num_splits();
@@ -77,8 +89,8 @@
                     Coordinates       coords;
                     std::tie(std::ignore, coords) = SplitLayerNode::compute_output_descriptor(input_tensor->desc(), num_splits, axis, i);
 
-                    backends::IDeviceBackend      *backend = backends::BackendRegistry::get().find_backend(output_tensor->desc().target);
-                    std::unique_ptr<ITensorHandle> handle  = backend->create_subtensor(input_tensor->handle(), output_shape, coords, extend_parent);
+                    backends::IDeviceBackend      &backend = backends::BackendRegistry::get().get_backend(output_tensor->desc().target);
+                    std::unique_ptr<ITensorHandle> handle  = backend.create_subtensor(input_tensor->handle(), output_shape, coords, extend_parent);
                     output_tensor->set_handle(std::move(handle));
                 }
             }
diff --git a/src/graph/nodes/ChannelShuffleLayerNode.cpp b/src/graph/nodes/ChannelShuffleLayerNode.cpp
new file mode 100644
index 0000000..08fcce1
--- /dev/null
+++ b/src/graph/nodes/ChannelShuffleLayerNode.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/nodes/ChannelShuffleLayerNode.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/INodeVisitor.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+ChannelShuffleLayerNode::ChannelShuffleLayerNode(unsigned int num_groups)
+    : _num_groups(num_groups)
+{
+    _input_edges.resize(1, EmptyEdgeID);
+    _outputs.resize(1, NullTensorID);
+}
+
+unsigned int ChannelShuffleLayerNode::num_groups() const
+{
+    return _num_groups;
+}
+
+bool ChannelShuffleLayerNode::forward_descriptors()
+{
+    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    {
+        Tensor *dst = output(0);
+        ARM_COMPUTE_ERROR_ON(dst == nullptr);
+        dst->desc() = configure_output(0);
+        return true;
+    }
+    return false;
+}
+
+TensorDescriptor ChannelShuffleLayerNode::configure_output(size_t idx) const
+{
+    ARM_COMPUTE_UNUSED(idx);
+    ARM_COMPUTE_ERROR_ON(idx >= _outputs.size());
+
+    const Tensor *src = input(0);
+    ARM_COMPUTE_ERROR_ON(src == nullptr);
+
+    return src->desc();
+}
+
+NodeType ChannelShuffleLayerNode::type() const
+{
+    return NodeType::ChannelShuffleLayer;
+}
+
+void ChannelShuffleLayerNode::accept(INodeVisitor &v)
+{
+    v.visit(*this);
+}
+} // namespace graph
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/graph/nodes/DepthConcatenateLayerNode.cpp b/src/graph/nodes/ConcatenateLayerNode.cpp
similarity index 61%
rename from src/graph/nodes/DepthConcatenateLayerNode.cpp
rename to src/graph/nodes/ConcatenateLayerNode.cpp
index 08cccc1..ade3f6e 100644
--- a/src/graph/nodes/DepthConcatenateLayerNode.cpp
+++ b/src/graph/nodes/ConcatenateLayerNode.cpp
@@ -21,58 +21,74 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "arm_compute/graph/nodes/DepthConcatenateLayerNode.h"
+#include "arm_compute/graph/nodes/ConcatenateLayerNode.h"
 
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/INodeVisitor.h"
+#include "arm_compute/graph/Utils.h"
+
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 
 namespace arm_compute
 {
 namespace graph
 {
-DepthConcatenateLayerNode::DepthConcatenateLayerNode(unsigned int total_nodes)
-    : _total_nodes(total_nodes), _is_enabled(true)
+ConcatenateLayerNode::ConcatenateLayerNode(unsigned int total_nodes, DataLayoutDimension axis)
+    : _total_nodes(total_nodes), _axis(axis), _is_enabled(true)
 {
     _input_edges.resize(_total_nodes, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
 }
 
-void DepthConcatenateLayerNode::set_enabled(bool is_enabled)
+void ConcatenateLayerNode::set_enabled(bool is_enabled)
 {
     _is_enabled = is_enabled;
 }
 
-bool DepthConcatenateLayerNode::is_enabled() const
+bool ConcatenateLayerNode::is_enabled() const
 {
     return _is_enabled;
 }
 
-TensorDescriptor DepthConcatenateLayerNode::compute_output_descriptor(const std::vector<TensorDescriptor> &input_descriptors)
+DataLayoutDimension ConcatenateLayerNode::concatenation_axis() const
+{
+    return _axis;
+}
+
+TensorDescriptor ConcatenateLayerNode::compute_output_descriptor(const std::vector<TensorDescriptor> &input_descriptors,
+                                                                 DataLayoutDimension                  axis)
 {
     ARM_COMPUTE_ERROR_ON(input_descriptors.size() == 0);
 
     TensorDescriptor output_descriptor = input_descriptors[0];
+    const int        axis_idx          = get_dimension_idx(output_descriptor, axis);
 
-    size_t max_x = 0;
-    size_t max_y = 0;
-    size_t depth = 0;
-
-    for(const auto &input_descriptor : input_descriptors)
+    // Extract shapes
+    std::vector<const TensorShape *> shapes;
+    for(auto &input_descriptor : input_descriptors)
     {
-        max_x = std::max(input_descriptor.shape.x(), max_x);
-        max_y = std::max(input_descriptor.shape.y(), max_y);
-        depth += input_descriptor.shape.z();
+        shapes.emplace_back(&input_descriptor.shape);
     }
 
-    output_descriptor.shape.set(0, max_x);
-    output_descriptor.shape.set(1, max_y);
-    output_descriptor.shape.set(2, depth);
+    // Calculate output shape
+    if(axis_idx == 0)
+    {
+        output_descriptor.shape = arm_compute::misc::shape_calculator::calculate_width_concatenate_shape(shapes);
+    }
+    else if(axis_idx == 2)
+    {
+        output_descriptor.shape = arm_compute::misc::shape_calculator::calculate_depth_concatenate_shape(shapes);
+    }
+    else
+    {
+        ARM_COMPUTE_ERROR("Unsupported concatenation axis!");
+    }
 
     return output_descriptor;
 }
 
-bool DepthConcatenateLayerNode::forward_descriptors()
+bool ConcatenateLayerNode::forward_descriptors()
 {
     if(_outputs[0] != NullTensorID)
     {
@@ -84,7 +100,7 @@
     return false;
 }
 
-TensorDescriptor DepthConcatenateLayerNode::configure_output(size_t idx) const
+TensorDescriptor ConcatenateLayerNode::configure_output(size_t idx) const
 {
     ARM_COMPUTE_UNUSED(idx);
     ARM_COMPUTE_ERROR_ON(idx >= _outputs.size());
@@ -106,18 +122,18 @@
             ARM_COMPUTE_ERROR_ON(t == nullptr);
             inputs_descriptors.push_back(t->desc());
         }
-        output_info = compute_output_descriptor(inputs_descriptors);
+        output_info = compute_output_descriptor(inputs_descriptors, _axis);
     }
 
     return output_info;
 }
 
-NodeType DepthConcatenateLayerNode::type() const
+NodeType ConcatenateLayerNode::type() const
 {
-    return NodeType::DepthConcatenateLayer;
+    return NodeType::ConcatenateLayer;
 }
 
-void DepthConcatenateLayerNode::accept(INodeVisitor &v)
+void ConcatenateLayerNode::accept(INodeVisitor &v)
 {
     v.visit(*this);
 }
diff --git a/src/graph/nodes/ConvolutionLayerNode.cpp b/src/graph/nodes/ConvolutionLayerNode.cpp
index 6c31a6b..e9cb039 100644
--- a/src/graph/nodes/ConvolutionLayerNode.cpp
+++ b/src/graph/nodes/ConvolutionLayerNode.cpp
@@ -32,8 +32,12 @@
 {
 namespace graph
 {
-ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method, FastMathHint fast_math_hint, QuantizationInfo out_quant_info)
-    : _info(std::move(info)), _method(method), _fast_math_hint(fast_math_hint), _out_quant_info(out_quant_info)
+ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo     info,
+                                           unsigned int      num_groups,
+                                           ConvolutionMethod method,
+                                           FastMathHint      fast_math_hint,
+                                           QuantizationInfo  out_quant_info)
+    : _info(std::move(info)), _num_groups(num_groups), _method(method), _fast_math_hint(fast_math_hint), _out_quant_info(out_quant_info)
 {
     _input_edges.resize(3, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
@@ -64,6 +68,11 @@
     return _info;
 }
 
+unsigned int ConvolutionLayerNode::num_groups() const
+{
+    return _num_groups;
+}
+
 TensorDescriptor ConvolutionLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
                                                                  const TensorDescriptor &weights_descriptor,
                                                                  const PadStrideInfo    &info)
@@ -125,4 +134,4 @@
     v.visit(*this);
 }
 } // namespace graph
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/DeconvolutionLayerNode.cpp b/src/graph/nodes/DeconvolutionLayerNode.cpp
new file mode 100644
index 0000000..9329ae3
--- /dev/null
+++ b/src/graph/nodes/DeconvolutionLayerNode.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/nodes/DeconvolutionLayerNode.h"
+
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/INodeVisitor.h"
+#include "arm_compute/graph/Utils.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+DeconvolutionLayerNode::DeconvolutionLayerNode(PadStrideInfo info, Size2D inner_border)
+    : _info(std::move(info)), _inner_border(inner_border)
+{
+    _input_edges.resize(3, EmptyEdgeID);
+    _outputs.resize(1, NullTensorID);
+}
+
+PadStrideInfo DeconvolutionLayerNode::deconvolution_info() const
+{
+    return _info;
+}
+
+Size2D DeconvolutionLayerNode::inner_border() const
+{
+    return _inner_border;
+}
+
+TensorDescriptor DeconvolutionLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
+                                                                   const TensorDescriptor &weights_descriptor,
+                                                                   const PadStrideInfo    &info,
+                                                                   const Size2D           &inner_border)
+{
+    unsigned int output_width  = 0;
+    unsigned int output_height = 0;
+
+    const unsigned int input_width   = get_dimension_size(input_descriptor, DataLayoutDimension::WIDTH);
+    const unsigned int input_height  = get_dimension_size(input_descriptor, DataLayoutDimension::HEIGHT);
+    const unsigned int kernel_width  = get_dimension_size(weights_descriptor, DataLayoutDimension::WIDTH);
+    const unsigned int kernel_height = get_dimension_size(weights_descriptor, DataLayoutDimension::HEIGHT);
+
+    std::tie(output_width, output_height) = deconvolution_output_dimensions(input_width, input_height,
+                                                                            kernel_width, kernel_height,
+                                                                            info.pad().first, info.pad().second,
+                                                                            inner_border.x(), inner_border.y(),
+                                                                            info.stride().first, info.stride().second);
+
+    TensorDescriptor output_descriptor = input_descriptor;
+    output_descriptor.shape.set(get_dimension_idx(output_descriptor, DataLayoutDimension::WIDTH), output_width);
+    output_descriptor.shape.set(get_dimension_idx(output_descriptor, DataLayoutDimension::HEIGHT), output_height);
+    output_descriptor.shape.set(get_dimension_idx(output_descriptor, DataLayoutDimension::CHANNEL), weights_descriptor.shape[3]);
+
+    return output_descriptor;
+}
+
+bool DeconvolutionLayerNode::forward_descriptors()
+{
+    if((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (output_id(0) != NullTensorID))
+    {
+        Tensor *dst = output(0);
+        ARM_COMPUTE_ERROR_ON(dst == nullptr);
+        dst->desc() = configure_output(0);
+        return true;
+    }
+    return false;
+}
+
+TensorDescriptor DeconvolutionLayerNode::configure_output(size_t idx) const
+{
+    ARM_COMPUTE_UNUSED(idx);
+    const Tensor *src     = input(0);
+    const Tensor *weights = input(1);
+
+    ARM_COMPUTE_ERROR_ON(src == nullptr || weights == nullptr);
+
+    TensorDescriptor output_info = compute_output_descriptor(src->desc(), weights->desc(), _info, _inner_border);
+    return output_info;
+}
+
+NodeType DeconvolutionLayerNode::type() const
+{
+    return NodeType::DeconvolutionLayer;
+}
+
+void DeconvolutionLayerNode::accept(INodeVisitor &v)
+{
+    v.visit(*this);
+}
+} // namespace graph
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/graph/nodes/DummyNode.cpp b/src/graph/nodes/DummyNode.cpp
new file mode 100644
index 0000000..e641181
--- /dev/null
+++ b/src/graph/nodes/DummyNode.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/nodes/DummyNode.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/INodeVisitor.h"
+#include "arm_compute/graph/Tensor.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+DummyNode::DummyNode(TensorShape shape)
+    : _shape(shape)
+{
+    _input_edges.resize(1, EmptyEdgeID);
+    _outputs.resize(1, NullTensorID);
+}
+
+bool DummyNode::forward_descriptors()
+{
+    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    {
+        Tensor *dst = output(0);
+        ARM_COMPUTE_ERROR_ON(dst == nullptr);
+        dst->desc() = configure_output(0);
+        return true;
+    }
+    return false;
+}
+
+TensorDescriptor DummyNode::configure_output(size_t idx) const
+{
+    ARM_COMPUTE_UNUSED(idx);
+    ARM_COMPUTE_ERROR_ON(idx >= _outputs.size());
+
+    const Tensor *src = input(0);
+    ARM_COMPUTE_ERROR_ON(src == nullptr);
+
+    TensorDescriptor output_desc = src->desc();
+    output_desc.shape            = _shape;
+
+    return output_desc;
+}
+
+NodeType DummyNode::type() const
+{
+    return NodeType::Dummy;
+}
+
+void DummyNode::accept(INodeVisitor &v)
+{
+    v.visit(*this);
+}
+} // namespace graph
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/graph/nodes/FullyConnectedLayer.cpp b/src/graph/nodes/FullyConnectedLayer.cpp
index d94a785..6ea0292 100644
--- a/src/graph/nodes/FullyConnectedLayer.cpp
+++ b/src/graph/nodes/FullyConnectedLayer.cpp
@@ -31,15 +31,17 @@
 {
 namespace graph
 {
-FullyConnectedLayerNode::FullyConnectedLayerNode(unsigned int num_outputs)
-    : _num_outputs(num_outputs)
+FullyConnectedLayerNode::FullyConnectedLayerNode(unsigned int num_outputs, QuantizationInfo out_quant_info, FullyConnectedLayerInfo fc_info)
+    : _num_outputs(num_outputs), _out_quant_info(out_quant_info), _info(fc_info)
 {
     _input_edges.resize(3, EmptyEdgeID);
     _outputs.resize(1, NullTensorID);
 }
 
 TensorDescriptor FullyConnectedLayerNode::compute_weights_descriptor(const TensorDescriptor &input_descriptor,
-                                                                     unsigned int            num_outputs)
+                                                                     unsigned int            num_outputs,
+                                                                     FullyConnectedLayerInfo fc_info,
+                                                                     QuantizationInfo        weights_quant_info)
 {
     unsigned int num_weights    = 1;
     unsigned int num_dimensions = input_descriptor.shape.num_dimensions();
@@ -56,11 +58,24 @@
     TensorDescriptor weights_descriptor = input_descriptor;
     weights_descriptor.shape            = TensorShape(num_weights, num_outputs);
 
+    // If weights are tranposed, use tranposed shape
+    if(!fc_info.transpose_weights)
+    {
+        weights_descriptor.shape = TensorShape(num_outputs, num_weights);
+    }
+
+    // Set quantization info if present
+    if(!weights_quant_info.empty())
+    {
+        weights_descriptor.quant_info = weights_quant_info;
+    }
+
     return weights_descriptor;
 }
 
 TensorDescriptor FullyConnectedLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
-                                                                    unsigned int            num_outputs)
+                                                                    unsigned int            num_outputs,
+                                                                    QuantizationInfo        out_quant_info)
 {
     // Note: Only 1D batch space is supported at the moment
     unsigned int batches = input_descriptor.shape[1];
@@ -69,12 +84,24 @@
         batches = input_descriptor.shape[3];
     }
 
+    // Set descriptor shape
     TensorDescriptor output_descriptor = input_descriptor;
     output_descriptor.shape            = TensorShape(num_outputs, batches);
 
+    // Set quantization info if present
+    if(!out_quant_info.empty())
+    {
+        output_descriptor.quant_info = out_quant_info;
+    }
+
     return output_descriptor;
 }
 
+FullyConnectedLayerInfo FullyConnectedLayerNode::info() const
+{
+    return _info;
+}
+
 bool FullyConnectedLayerNode::forward_descriptors()
 {
     if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
@@ -93,7 +120,7 @@
     const Tensor *src = input(0);
     ARM_COMPUTE_ERROR_ON(src == nullptr);
 
-    return compute_output_descriptor(src->desc(), _num_outputs);
+    return compute_output_descriptor(src->desc(), _num_outputs, _out_quant_info);
 }
 
 NodeType FullyConnectedLayerNode::type() const
diff --git a/src/graph/nodes/PermuteLayerNode.cpp b/src/graph/nodes/PermuteLayerNode.cpp
new file mode 100644
index 0000000..042ec09
--- /dev/null
+++ b/src/graph/nodes/PermuteLayerNode.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/nodes/PermuteLayerNode.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/INodeVisitor.h"
+
+#include "arm_compute/core/Helpers.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+PermuteLayerNode::PermuteLayerNode(PermutationVector perm, DataLayout layout)
+    : _perm(perm), _layout(layout)
+{
+    _input_edges.resize(1, EmptyEdgeID);
+    _outputs.resize(1, NullTensorID);
+}
+
+const PermutationVector &PermuteLayerNode::permutation_vector() const
+{
+    return _perm;
+}
+
+bool PermuteLayerNode::forward_descriptors()
+{
+    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    {
+        Tensor *dst = output(0);
+        ARM_COMPUTE_ERROR_ON(dst == nullptr);
+        dst->desc() = configure_output(0);
+        return true;
+    }
+    return false;
+}
+
+TensorDescriptor PermuteLayerNode::configure_output(size_t idx) const
+{
+    ARM_COMPUTE_UNUSED(idx);
+    ARM_COMPUTE_ERROR_ON(idx >= _outputs.size());
+
+    const Tensor *src = input(0);
+    ARM_COMPUTE_ERROR_ON(src == nullptr);
+
+    TensorDescriptor output_desc = src->desc();
+    permute(output_desc.shape, _perm);
+    if(_layout != DataLayout::UNKNOWN)
+    {
+        output_desc.layout = _layout;
+    }
+
+    return output_desc;
+}
+
+NodeType PermuteLayerNode::type() const
+{
+    return NodeType::PermuteLayer;
+}
+
+void PermuteLayerNode::accept(INodeVisitor &v)
+{
+    v.visit(*this);
+}
+} // namespace graph
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/graph/nodes/ResizeLayerNode.cpp b/src/graph/nodes/ResizeLayerNode.cpp
new file mode 100644
index 0000000..a6aa7bf
--- /dev/null
+++ b/src/graph/nodes/ResizeLayerNode.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/nodes/ResizeLayerNode.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/INodeVisitor.h"
+#include "arm_compute/graph/Utils.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+ResizeLayerNode::ResizeLayerNode(InterpolationPolicy policy, float scale_width, float scale_height)
+    : _policy(policy), _scale_width(scale_width), _scale_height(scale_height)
+{
+    _input_edges.resize(1, EmptyEdgeID);
+    _outputs.resize(1, NullTensorID);
+}
+
+InterpolationPolicy ResizeLayerNode::policy() const
+{
+    return _policy;
+}
+
+std::pair<float, float> ResizeLayerNode::scaling_factor() const
+{
+    return std::make_pair(_scale_width, _scale_height);
+}
+
+bool ResizeLayerNode::forward_descriptors()
+{
+    if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+    {
+        Tensor *dst = output(0);
+        ARM_COMPUTE_ERROR_ON(dst == nullptr);
+        dst->desc() = configure_output(0);
+        return true;
+    }
+    return false;
+}
+
+TensorDescriptor ResizeLayerNode::configure_output(size_t idx) const
+{
+    ARM_COMPUTE_UNUSED(idx);
+    ARM_COMPUTE_ERROR_ON(idx >= _outputs.size());
+
+    const Tensor *src = input(0);
+    ARM_COMPUTE_ERROR_ON(src == nullptr);
+
+    TensorDescriptor output_desc = src->desc();
+    size_t           width_idx   = get_dimension_idx(output_desc, DataLayoutDimension::WIDTH);
+    size_t           height_idx  = get_dimension_idx(output_desc, DataLayoutDimension::HEIGHT);
+    output_desc.shape.set(width_idx, static_cast<int>(output_desc.shape[width_idx] * _scale_width));
+    output_desc.shape.set(height_idx, static_cast<int>(output_desc.shape[height_idx] * _scale_height));
+
+    return output_desc;
+}
+
+NodeType ResizeLayerNode::type() const
+{
+    return NodeType::ResizeLayer;
+}
+
+void ResizeLayerNode::accept(INodeVisitor &v)
+{
+    v.visit(*this);
+}
+} // namespace graph
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/graph/printers/DotGraphPrinter.cpp b/src/graph/printers/DotGraphPrinter.cpp
index 61cf423..ef156ea 100644
--- a/src/graph/printers/DotGraphPrinter.cpp
+++ b/src/graph/printers/DotGraphPrinter.cpp
@@ -47,6 +47,15 @@
     _info = ss.str();
 }
 
+void DotGraphVisitor::visit(ConcatenateLayerNode &n)
+{
+    std::stringstream ss;
+    ss << "Enabled: " << n.is_enabled();
+    ss << R"( \n )";
+    ss << "Axis: " << n.concatenation_axis();
+    _info = ss.str();
+}
+
 void DotGraphVisitor::visit(ConvolutionLayerNode &n)
 {
     std::stringstream ss;
@@ -54,13 +63,6 @@
     _info = ss.str();
 }
 
-void DotGraphVisitor::visit(DepthConcatenateLayerNode &n)
-{
-    std::stringstream ss;
-    ss << "Enabled: " << n.is_enabled();
-    _info = ss.str();
-}
-
 void DotGraphVisitor::visit(DepthwiseConvolutionLayerNode &n)
 {
     std::stringstream ss;