arm_compute v18.08
diff --git a/src/graph/Graph.cpp b/src/graph/Graph.cpp
index e1ffeed..88e2682 100644
--- a/src/graph/Graph.cpp
+++ b/src/graph/Graph.cpp
@@ -41,17 +41,24 @@
std::unique_ptr<INode> &node = _nodes[nid];
- // Remove node connections
if(node)
{
+ // Remove input connections
for(auto &input_eid : node->_input_edges)
{
remove_connection(input_eid);
}
- for(auto &outpud_eid : node->_output_edges)
+
+ // Remove output connections
+ std::set<EdgeID> output_edges_copy = node->output_edges();
+ for(auto &outpud_eid : output_edges_copy)
{
remove_connection(outpud_eid);
}
+
+ // Remove nid from tagged nodes
+ std::vector<NodeID> &tnodes = _tagged_nodes.at(node->type());
+ tnodes.erase(std::remove(tnodes.begin(), tnodes.end(), nid), tnodes.end());
}
node = nullptr;
@@ -164,9 +171,9 @@
return _id;
}
-const std::vector<NodeID> &Graph::inputs()
+const std::vector<NodeID> &Graph::nodes(NodeType type)
{
- return _tagged_nodes[NodeType::Input];
+ return _tagged_nodes[type];
}
std::vector<std::unique_ptr<INode>> &Graph::nodes()
diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp
index 4c5d30a..81a18c4 100644
--- a/src/graph/GraphBuilder.cpp
+++ b/src/graph/GraphBuilder.cpp
@@ -25,9 +25,11 @@
#include "arm_compute/graph/Graph.h"
#include "arm_compute/graph/Utils.h"
-#include "arm_compute/graph/algorithms/BFS.h"
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
#include "arm_compute/graph/nodes/Nodes.h"
+#include "support/ToolchainSupport.h"
+
#define CHECK_NODEIDX_PAIR(pair, g) \
ARM_COMPUTE_ERROR_ON(((pair).node_id >= (g).nodes().size()) || ((g).node((pair).node_id) == nullptr) || ((pair).index >= (g).node((pair).node_id)->num_outputs()));
@@ -79,43 +81,6 @@
return nid;
}
-
-NodeID create_grouped_convolution(Graph &g, NodeParams ¶ms, NodeIdxPair input, NodeID weights, NodeID bias,
- PadStrideInfo conv_info, ConvolutionMethod method, FastMathHint fast_math_hint, unsigned int num_groups)
-{
- bool has_bias = (bias != EmptyNodeID);
-
- // Split input
- NodeID input_split = GraphBuilder::add_split_node(g, params, input, num_groups, 2);
-
- // Split weights
- NodeID weights_split = GraphBuilder::add_split_node(g, params, { weights, 0 }, num_groups, 3);
-
- // Split bias
- NodeID bias_split = EmptyNodeID;
- if(has_bias)
- {
- // Split bias
- bias_split = GraphBuilder::add_split_node(g, params, { bias, 0 }, num_groups, 0);
- }
-
- std::vector<NodeIdxPair> convolution_outputs;
- for(unsigned int i = 0; i < num_groups; ++i)
- {
- NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, method, fast_math_hint);
- g.add_connection(input_split, i, conv_nid, 0);
- g.add_connection(weights_split, i, conv_nid, 1);
- if(has_bias)
- {
- g.add_connection(bias_split, i, conv_nid, 2);
- }
- set_node_params(g, conv_nid, params);
- convolution_outputs.push_back({ conv_nid, 0 });
- }
-
- // Depth concatenate output
- return GraphBuilder::add_depth_concatenate_node(g, params, convolution_outputs);
-}
} // namespace
NodeID GraphBuilder::add_const_node(Graph &g, NodeParams params, TensorDescriptor desc, ITensorAccessorUPtr accessor)
@@ -203,6 +168,11 @@
return batch_norm_nid;
}
+NodeID GraphBuilder::add_channel_shuffle_node(Graph &g, NodeParams params, NodeIdxPair input, unsigned int num_groups)
+{
+ return create_simple_single_input_output_node<ChannelShuffleLayerNode>(g, params, input, num_groups);
+}
+
NodeID GraphBuilder::add_convolution_node(Graph &g, NodeParams params, NodeIdxPair input,
Size2D kernel_spatial_extend, unsigned int depth, PadStrideInfo conv_info,
unsigned int num_groups, ConvolutionMethod method, FastMathHint fast_math_hint,
@@ -239,34 +209,81 @@
{
TensorDescriptor b_desc = input_tensor_desc;
b_desc.shape = TensorShape(depth);
- b_nid = add_const_node_with_name(g, params, "Bias", b_desc, std::move(bias_accessor));
- }
-
- if(num_groups == 1)
- {
- // Create convolution node and connect
- NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, method, fast_math_hint, out_quant_info);
- g.add_connection(input.node_id, input.index, conv_nid, 0);
- g.add_connection(w_nid, 0, conv_nid, 1);
- if(has_bias)
+ if(is_data_type_quantized_asymmetric(input_tensor_desc.data_type))
{
- g.add_connection(b_nid, 0, conv_nid, 2);
+ b_desc.data_type = DataType::S32;
}
- set_node_params(g, conv_nid, params);
+ b_nid = add_const_node_with_name(g, params, "Bias", b_desc, std::move(bias_accessor));
+ }
- return conv_nid;
- }
- else
+ // Create convolution node and connect
+ NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, num_groups, method, fast_math_hint, out_quant_info);
+ g.add_connection(input.node_id, input.index, conv_nid, 0);
+ g.add_connection(w_nid, 0, conv_nid, 1);
+ if(has_bias)
{
- return create_grouped_convolution(g, params, input, w_nid, b_nid, conv_info, method, fast_math_hint, num_groups);
+ g.add_connection(b_nid, 0, conv_nid, 2);
}
+ set_node_params(g, conv_nid, params);
+
+ return conv_nid;
}
-NodeID GraphBuilder::add_depth_concatenate_node(Graph &g, NodeParams params, std::vector<NodeIdxPair> inputs)
+NodeID GraphBuilder::add_deconvolution_node(Graph &g, NodeParams params, NodeIdxPair input,
+ Size2D kernel_spatial_extend, unsigned int depth, PadStrideInfo deconv_info,
+ Size2D inner_border, ITensorAccessorUPtr weights_accessor,
+ ITensorAccessorUPtr bias_accessor)
+{
+ CHECK_NODEIDX_PAIR(input, g);
+ ARM_COMPUTE_ERROR_ON(depth == 0);
+ ARM_COMPUTE_ERROR_ON((kernel_spatial_extend.width == 0) || (kernel_spatial_extend.height == 0));
+
+ bool has_bias = (bias_accessor != nullptr);
+
+ // Get input tensor descriptor
+ const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
+
+ // Create weights node
+ TensorDescriptor w_desc = input_tensor_desc;
+ w_desc.shape.set(get_dimension_idx(input_tensor_desc, DataLayoutDimension::WIDTH), kernel_spatial_extend.width);
+ w_desc.shape.set(get_dimension_idx(input_tensor_desc, DataLayoutDimension::HEIGHT), kernel_spatial_extend.height);
+ w_desc.shape.set(get_dimension_idx(input_tensor_desc, DataLayoutDimension::CHANNEL),
+ get_dimension_size(input_tensor_desc, DataLayoutDimension::CHANNEL));
+ w_desc.shape.set(get_dimension_idx(input_tensor_desc, DataLayoutDimension::BATCHES), depth);
+
+ NodeID w_nid = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor));
+
+ // Create bias nodes
+ NodeID b_nid = EmptyNodeID;
+ if(has_bias)
+ {
+ TensorDescriptor b_desc = input_tensor_desc;
+ b_desc.shape = TensorShape(depth);
+ if(is_data_type_quantized_asymmetric(input_tensor_desc.data_type))
+ {
+ b_desc.data_type = DataType::S32;
+ }
+ b_nid = add_const_node_with_name(g, params, "Bias", b_desc, std::move(bias_accessor));
+ }
+
+ // Create convolution node and connect
+ NodeID deconv_nid = g.add_node<DeconvolutionLayerNode>(deconv_info, inner_border);
+ g.add_connection(input.node_id, input.index, deconv_nid, 0);
+ g.add_connection(w_nid, 0, deconv_nid, 1);
+ if(has_bias)
+ {
+ g.add_connection(b_nid, 0, deconv_nid, 2);
+ }
+ set_node_params(g, deconv_nid, params);
+
+ return deconv_nid;
+}
+
+NodeID GraphBuilder::add_concatenate_node(Graph &g, NodeParams params, std::vector<NodeIdxPair> inputs, DataLayoutDimension axis)
{
ARM_COMPUTE_ERROR_ON(inputs.size() == 0);
- NodeID nid = g.add_node<DepthConcatenateLayerNode>(inputs.size());
+ NodeID nid = g.add_node<ConcatenateLayerNode>(inputs.size(), axis);
unsigned int i = 0;
for(const auto &input : inputs)
@@ -309,7 +326,7 @@
if(has_bias)
{
TensorDescriptor b_desc = input_tensor_desc;
- b_desc.shape = TensorShape(b_desc.shape.z());
+ b_desc.shape = TensorShape(get_dimension_size(input_tensor_desc, DataLayoutDimension::CHANNEL));
b_nid = add_const_node_with_name(g, params, "Bias", b_desc, std::move(bias_accessor));
}
@@ -326,6 +343,11 @@
return conv_nid;
}
+NodeID GraphBuilder::add_dummy_node(Graph &g, NodeParams params, NodeIdxPair input, TensorShape shape)
+{
+ return create_simple_single_input_output_node<DummyNode>(g, params, input, shape);
+}
+
NodeID GraphBuilder::add_elementwise_node(Graph &g, NodeParams params, NodeIdxPair input0, NodeIdxPair input1, EltwiseOperation operation)
{
CHECK_NODEIDX_PAIR(input0, g);
@@ -347,7 +369,9 @@
}
NodeID GraphBuilder::add_fully_connected_layer(Graph &g, NodeParams params, NodeIdxPair input, unsigned int num_outputs,
- ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor)
+ ITensorAccessorUPtr weights_accessor, ITensorAccessorUPtr bias_accessor,
+ const FullyConnectedLayerInfo fc_info,
+ const QuantizationInfo weights_quant_info, const QuantizationInfo out_quant_info)
{
CHECK_NODEIDX_PAIR(input, g);
ARM_COMPUTE_ERROR_ON(num_outputs == 0);
@@ -358,7 +382,7 @@
const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
// Create weights node
- TensorDescriptor w_desc = FullyConnectedLayerNode::compute_weights_descriptor(input_tensor_desc, num_outputs);
+ TensorDescriptor w_desc = FullyConnectedLayerNode::compute_weights_descriptor(input_tensor_desc, num_outputs, fc_info, weights_quant_info);
NodeID w_nid = add_const_node_with_name(g, params, "Weights", w_desc, std::move(weights_accessor));
// Create bias nodes
@@ -367,11 +391,15 @@
{
TensorDescriptor b_desc = input_tensor_desc;
b_desc.shape = TensorShape(num_outputs);
- b_nid = add_const_node_with_name(g, params, "Bias", b_desc, std::move(bias_accessor));
+ if(is_data_type_quantized_asymmetric(input_tensor_desc.data_type))
+ {
+ b_desc.data_type = DataType::S32;
+ }
+ b_nid = add_const_node_with_name(g, params, "Bias", b_desc, std::move(bias_accessor));
}
- // Create convolution node and connect
- NodeID fc_nid = g.add_node<FullyConnectedLayerNode>(num_outputs);
+ // Create fully connected node and connect
+ NodeID fc_nid = g.add_node<FullyConnectedLayerNode>(num_outputs, out_quant_info, fc_info);
g.add_connection(input.node_id, input.index, fc_nid, 0);
g.add_connection(w_nid, 0, fc_nid, 1);
if(has_bias)
@@ -389,6 +417,11 @@
return create_simple_single_input_output_node<NormalizationLayerNode>(g, params, input, norm_info);
}
+NodeID GraphBuilder::add_permute_node(Graph &g, NodeParams params, NodeIdxPair input, PermutationVector perm, DataLayout layout)
+{
+ return create_simple_single_input_output_node<PermuteLayerNode>(g, params, input, perm, layout);
+}
+
NodeID GraphBuilder::add_pooling_node(Graph &g, NodeParams params, NodeIdxPair input, PoolingLayerInfo pool_info)
{
return create_simple_single_input_output_node<PoolingLayerNode>(g, params, input, pool_info);
@@ -399,6 +432,12 @@
return create_simple_single_input_output_node<ReshapeLayerNode>(g, params, input, shape);
}
+NodeID GraphBuilder::add_resize_node(Graph &g, NodeParams params, NodeIdxPair input, InterpolationPolicy policy,
+ float width_scale, float height_scale)
+{
+ return create_simple_single_input_output_node<ResizeLayerNode>(g, params, input, policy, width_scale, height_scale);
+}
+
NodeID GraphBuilder::add_scale_layer(Graph &g, const NodeParams ¶ms, NodeIdxPair input, ITensorAccessorUPtr mul_accessor, ITensorAccessorUPtr add_accessor)
{
CHECK_NODEIDX_PAIR(input, g);
@@ -421,9 +460,9 @@
NodeIdxPair add_const_nidxp = { add_const_nid, 0 };
// Create node and connect
- NodeID mul_node = GraphBuilder::add_elementwise_node(g, params, input, mul_const_nidxp, EltwiseOperation::MUL);
+ NodeID mul_node = GraphBuilder::add_elementwise_node(g, params, input, mul_const_nidxp, EltwiseOperation::Mul);
NodeIdxPair mulnode_nidxp = { mul_node, 0 };
- NodeID add_node = GraphBuilder::add_elementwise_node(g, params, mulnode_nidxp, add_const_nidxp, EltwiseOperation::ADD);
+ NodeID add_node = GraphBuilder::add_elementwise_node(g, params, mulnode_nidxp, add_const_nidxp, EltwiseOperation::Add);
return add_node;
}
@@ -438,4 +477,4 @@
return create_simple_single_input_output_node<SplitLayerNode>(g, params, input, num_splits, axis);
}
} // namespace graph
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/GraphContext.cpp b/src/graph/GraphContext.cpp
index 3f31114..5f33ed3 100644
--- a/src/graph/GraphContext.cpp
+++ b/src/graph/GraphContext.cpp
@@ -22,7 +22,9 @@
* SOFTWARE.
*/
#include "arm_compute/graph/GraphContext.h"
-#include <arm_compute/graph.h>
+
+#include "arm_compute/graph.h"
+#include "arm_compute/graph/Utils.h"
namespace arm_compute
{
@@ -33,6 +35,12 @@
{
}
+GraphContext::~GraphContext()
+{
+ _memory_managers.clear();
+ release_default_graph_context(*this);
+}
+
const GraphConfig &GraphContext::config() const
{
return _config;
@@ -82,4 +90,4 @@
}
}
} // namespace graph
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/GraphManager.cpp b/src/graph/GraphManager.cpp
index ad45845..f9d13ac 100644
--- a/src/graph/GraphManager.cpp
+++ b/src/graph/GraphManager.cpp
@@ -27,10 +27,13 @@
#include "arm_compute/graph/GraphContext.h"
#include "arm_compute/graph/Logger.h"
#include "arm_compute/graph/PassManager.h"
+#include "arm_compute/graph/TypePrinter.h"
#include "arm_compute/graph/Utils.h"
#include "arm_compute/graph/detail/CrossLayerMemoryManagerHelpers.h"
#include "arm_compute/graph/detail/ExecutionHelpers.h"
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
+
namespace arm_compute
{
namespace graph
@@ -38,7 +41,6 @@
GraphManager::GraphManager()
: _workloads()
{
- detail::default_initialize_backends();
}
void GraphManager::finalize_graph(Graph &graph, GraphContext &ctx, PassManager &pm, Target target)
@@ -53,7 +55,12 @@
}
// Force target to all graph construct
- Target forced_target = is_target_supported(target) ? target : get_default_target();
+ Target forced_target = target;
+ if(!is_target_supported(target))
+ {
+ forced_target = get_default_target();
+ ARM_COMPUTE_LOG_GRAPH_INFO("Switching target from " << target << " to " << forced_target << std::endl);
+ }
force_target_to_graph(graph, forced_target);
// Configure all tensors
@@ -62,22 +69,22 @@
// Apply all mutating passes
pm.run_all(graph);
+ // Perform topological sort
+ std::vector<NodeID> topological_sorted_nodes = dfs(graph);
+
// Validate all nodes
detail::validate_all_nodes(graph);
// Configure all nodes
- auto workload = detail::configure_all_nodes(graph, ctx);
+ auto workload = detail::configure_all_nodes(graph, ctx, topological_sorted_nodes);
ARM_COMPUTE_ERROR_ON_MSG(workload.tasks.empty(), "Could not configure all nodes!");
// Allocate const tensors and call accessors
detail::allocate_const_tensors(graph);
detail::call_all_const_node_accessors(graph);
- if(forced_target == Target::CL)
- {
- // Prepare graph
- detail::prepare_all_tasks(workload);
- }
+ // Prepare graph
+ detail::prepare_all_tasks(workload);
// Setup tensor memory (Allocate all tensors or setup transition manager)
if(ctx.config().use_transition_memory_manager)
@@ -95,15 +102,6 @@
// Register graph
_workloads.insert(std::make_pair(graph.id(), std::move(workload)));
ARM_COMPUTE_LOG_GRAPH_VERBOSE("Created workload for graph with ID : " << graph.id().get() << std::endl);
-
- if(forced_target != Target::CL)
- {
- // Make first run
- execute_graph(graph);
-
- // Release all unused const tensors
- detail::release_unused_tensors(graph);
- }
}
void GraphManager::execute_graph(Graph &graph)
@@ -112,14 +110,23 @@
auto it = _workloads.find(graph.id());
ARM_COMPUTE_ERROR_ON_MSG(it == std::end(_workloads), "Graph is not registered!");
- // Call input accessors
- detail::call_all_input_node_accessors(it->second);
+ while(true)
+ {
+ // Call input accessors
+ if(!detail::call_all_input_node_accessors(it->second))
+ {
+ return;
+ }
- // Run graph
- detail::call_all_tasks(it->second);
+ // Run graph
+ detail::call_all_tasks(it->second);
- // Call output accessors
- detail::call_all_output_node_accessors(it->second);
+ // Call output accessors
+ if(!detail::call_all_output_node_accessors(it->second))
+ {
+ return;
+ }
+ }
}
void GraphManager::invalidate_graph(Graph &graph)
diff --git a/src/graph/INode.cpp b/src/graph/INode.cpp
index cd9a46a..b0c3137 100644
--- a/src/graph/INode.cpp
+++ b/src/graph/INode.cpp
@@ -185,6 +185,11 @@
return _outputs.size();
}
+NodeParams INode::common_node_params() const
+{
+ return _common_params;
+}
+
Target INode::requested_target() const
{
return _common_params.target;
diff --git a/src/graph/Tensor.cpp b/src/graph/Tensor.cpp
index 287e783..9850128 100644
--- a/src/graph/Tensor.cpp
+++ b/src/graph/Tensor.cpp
@@ -67,6 +67,11 @@
return _accessor.get();
}
+std::unique_ptr<ITensorAccessor> Tensor::extract_accessor()
+{
+ return std::move(_accessor);
+}
+
bool Tensor::call_accessor()
{
// Early exit guard
@@ -85,12 +90,12 @@
}
// Call accessor
- _accessor->access_tensor(_handle->tensor());
+ bool retval = _accessor->access_tensor(_handle->tensor());
// Unmap tensor
_handle->unmap();
- return true;
+ return retval;
}
void Tensor::bind_edge(EdgeID eid)
diff --git a/src/graph/TypeLoader.cpp b/src/graph/TypeLoader.cpp
new file mode 100644
index 0000000..30a3546
--- /dev/null
+++ b/src/graph/TypeLoader.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWNISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/TypeLoader.h"
+
+#include "arm_compute/core/utils/misc/Utility.h"
+
+#include <map>
+
+namespace arm_compute
+{
+arm_compute::DataType data_type_from_name(const std::string &name)
+{
+ static const std::map<std::string, arm_compute::DataType> data_types =
+ {
+ { "f16", DataType::F16 },
+ { "f32", DataType::F32 },
+ { "qasymm8", DataType::QASYMM8 },
+ };
+
+ try
+ {
+ return data_types.at(arm_compute::utility::tolower(name));
+ }
+ catch(const std::out_of_range &)
+ {
+ throw std::invalid_argument(name);
+ }
+}
+
+arm_compute::DataLayout data_layout_from_name(const std::string &name)
+{
+ static const std::map<std::string, arm_compute::DataLayout> data_layouts =
+ {
+ { "nhwc", DataLayout::NHWC },
+ { "nchw", DataLayout::NCHW },
+ };
+
+ try
+ {
+ return data_layouts.at(arm_compute::utility::tolower(name));
+ }
+ catch(const std::out_of_range &)
+ {
+ throw std::invalid_argument(name);
+ }
+}
+namespace graph
+{
+Target target_from_name(const std::string &name)
+{
+ static const std::map<std::string, Target> targets =
+ {
+ { "neon", Target::NEON },
+ { "cl", Target::CL },
+ { "gles", Target::GC },
+ };
+
+ try
+ {
+ return targets.at(arm_compute::utility::tolower(name));
+ }
+ catch(const std::out_of_range &)
+ {
+ throw std::invalid_argument(name);
+ }
+}
+} // namespace graph
+} // namespace arm_compute
diff --git a/src/graph/Utils.cpp b/src/graph/Utils.cpp
index 030fa2d..0a85a7f 100644
--- a/src/graph/Utils.cpp
+++ b/src/graph/Utils.cpp
@@ -78,22 +78,44 @@
{
PassManager pm;
+ // Passes that mutate graph IR
+ pm.append(support::cpp14::make_unique<GroupedConvolutionMutator>());
if(target != Target::GC)
{
- pm.append(support::cpp14::make_unique<InPlaceOperationMutator>());
pm.append(support::cpp14::make_unique<NodeFusionMutator>());
- pm.append(support::cpp14::make_unique<SplitLayerSubTensorMutator>());
- pm.append(support::cpp14::make_unique<DepthConcatSubTensorMutator>());
+ pm.append(support::cpp14::make_unique<InPlaceOperationMutator>());
}
+ // Passes that mutate backend information
+ if(target != Target::GC)
+ {
+ pm.append(support::cpp14::make_unique<DepthConcatSubTensorMutator>());
+ pm.append(support::cpp14::make_unique<SplitLayerSubTensorMutator>());
+ }
+ pm.append(support::cpp14::make_unique<NodeExecutionMethodMutator>());
+
return pm;
}
+void release_default_graph_context(GraphContext &ctx)
+{
+ for(const auto &backend : backends::BackendRegistry::get().backends())
+ {
+ if(backend.second->is_backend_supported())
+ {
+ backend.second->release_backend_context(ctx);
+ }
+ }
+}
+
void setup_default_graph_context(GraphContext &ctx)
{
for(const auto &backend : backends::BackendRegistry::get().backends())
{
- backend.second->setup_backend_context(ctx);
+ if(backend.second->is_backend_supported())
+ {
+ backend.second->setup_backend_context(ctx);
+ }
}
}
@@ -131,5 +153,37 @@
break;
}
}
+
+std::vector<NodeIdxPair> get_driving_nodes(const INode &node)
+{
+ std::vector<NodeIdxPair> driving_nodes;
+
+ const Graph *g = node.graph();
+ ARM_COMPUTE_ERROR_ON(g == nullptr);
+
+ for(auto &output_edge_id : node.output_edges())
+ {
+ auto output_edge = g->edge(output_edge_id);
+ if(output_edge != nullptr)
+ {
+ ARM_COMPUTE_ERROR_ON(output_edge->consumer() == nullptr);
+ driving_nodes.push_back({ output_edge->consumer_id(), output_edge->consumer_idx() });
+ }
+ }
+
+ return driving_nodes;
+}
+
+void configure_tensor(Tensor *tensor)
+{
+ if(tensor != nullptr && tensor->handle() == nullptr)
+ {
+ Target target = tensor->desc().target;
+ backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(target);
+ std::unique_ptr<ITensorHandle> handle = backend.create_tensor(*tensor);
+ ARM_COMPUTE_ERROR_ON_MSG(!handle, "Couldn't create backend handle!");
+ tensor->set_handle(std::move(handle));
+ }
+}
} // namespace graph
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/algorithms/TopologicalSort.cpp b/src/graph/algorithms/TopologicalSort.cpp
new file mode 100644
index 0000000..0fbf6e3
--- /dev/null
+++ b/src/graph/algorithms/TopologicalSort.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
+
+#include "arm_compute/graph/Graph.h"
+
+#include "arm_compute/core/utils/misc/Iterable.h"
+
+#include <list>
+#include <stack>
+
+namespace arm_compute
+{
+namespace graph
+{
+namespace detail
+{
+/** Checks if all the input dependencies of a node have been visited
+ *
+ * @param[in] node Node to check
+ * @param[in] visited Vector that contains the visited information
+ *
+ * @return True if all inputs dependencies have been visited else false
+ */
+inline bool all_inputs_are_visited(const INode *node, const std::vector<bool> &visited)
+{
+ ARM_COMPUTE_ERROR_ON(node == nullptr);
+ const Graph *graph = node->graph();
+ ARM_COMPUTE_ERROR_ON(graph == nullptr);
+
+ bool are_all_visited = true;
+ for(const auto &input_edge_id : node->input_edges())
+ {
+ if(input_edge_id != EmptyNodeID)
+ {
+ const Edge *input_edge = graph->edge(input_edge_id);
+ ARM_COMPUTE_ERROR_ON(input_edge == nullptr);
+ ARM_COMPUTE_ERROR_ON(input_edge->producer() == nullptr);
+ if(!visited[input_edge->producer_id()])
+ {
+ are_all_visited = false;
+ break;
+ }
+ }
+ }
+
+ return are_all_visited;
+}
+} // namespace detail
+
+std::vector<NodeID> bfs(Graph &g)
+{
+ std::vector<NodeID> bfs_order_vector;
+
+ // Created visited vector
+ std::vector<bool> visited(g.nodes().size(), false);
+
+ // Create BFS queue
+ std::list<NodeID> queue;
+
+ // Push inputs and mark as visited
+ for(auto &input : g.nodes(NodeType::Input))
+ {
+ if(input != EmptyNodeID)
+ {
+ visited[input] = true;
+ queue.push_back(input);
+ }
+ }
+
+ // Push const nodes and mark as visited
+ for(auto &const_node : g.nodes(NodeType::Const))
+ {
+ if(const_node != EmptyNodeID)
+ {
+ visited[const_node] = true;
+ queue.push_back(const_node);
+ }
+ }
+
+ // Iterate over vector and edges
+ while(!queue.empty())
+ {
+ // Dequeue a node from queue and process
+ NodeID n = queue.front();
+ bfs_order_vector.push_back(n);
+ queue.pop_front();
+
+ const INode *node = g.node(n);
+ ARM_COMPUTE_ERROR_ON(node == nullptr);
+ for(const auto &eid : node->output_edges())
+ {
+ const Edge *e = g.edge(eid);
+ ARM_COMPUTE_ERROR_ON(e == nullptr);
+ if(!visited[e->consumer_id()] && detail::all_inputs_are_visited(e->consumer(), visited))
+ {
+ visited[e->consumer_id()] = true;
+ queue.push_back(e->consumer_id());
+ }
+ }
+ }
+
+ return bfs_order_vector;
+}
+
+std::vector<NodeID> dfs(Graph &g)
+{
+ std::vector<NodeID> dfs_order_vector;
+
+ // Created visited vector
+ std::vector<bool> visited(g.nodes().size(), false);
+
+ // Create DFS stack
+ std::stack<NodeID> stack;
+
+ // Push inputs and mark as visited
+ for(auto &input : g.nodes(NodeType::Input))
+ {
+ if(input != EmptyNodeID)
+ {
+ visited[input] = true;
+ stack.push(input);
+ }
+ }
+
+ // Push const nodes and mark as visited
+ for(auto &const_node : g.nodes(NodeType::Const))
+ {
+ if(const_node != EmptyNodeID)
+ {
+ visited[const_node] = true;
+ stack.push(const_node);
+ }
+ }
+
+ // Iterate over vector and edges
+ while(!stack.empty())
+ {
+ // Pop a node from stack and process
+ NodeID n = stack.top();
+ dfs_order_vector.push_back(n);
+ stack.pop();
+
+ // Mark node as visited
+ if(!visited[n])
+ {
+ visited[n] = true;
+ }
+
+ const INode *node = g.node(n);
+ ARM_COMPUTE_ERROR_ON(node == nullptr);
+ // Reverse iterate to push branches from right to left and pop on the opposite order
+ for(const auto &eid : arm_compute::utils::iterable::reverse_iterate(node->output_edges()))
+ {
+ const Edge *e = g.edge(eid);
+ ARM_COMPUTE_ERROR_ON(e == nullptr);
+ if(!visited[e->consumer_id()] && detail::all_inputs_are_visited(e->consumer(), visited))
+ {
+ stack.push(e->consumer_id());
+ }
+ }
+ }
+
+ return dfs_order_vector;
+}
+} // namespace graph
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/graph/backends/BackendRegistry.cpp b/src/graph/backends/BackendRegistry.cpp
index 2803322..dccfefc 100644
--- a/src/graph/backends/BackendRegistry.cpp
+++ b/src/graph/backends/BackendRegistry.cpp
@@ -48,6 +48,14 @@
return _registered_backends[target].get();
}
+IDeviceBackend &BackendRegistry::get_backend(Target target)
+{
+ IDeviceBackend *backend = find_backend(target);
+ ARM_COMPUTE_ERROR_ON_MSG(!backend, "Requested backend doesn't exist!");
+ ARM_COMPUTE_ERROR_ON_MSG(!backend->is_backend_supported(), "Requested backend isn't supported");
+ return *backend;
+}
+
bool BackendRegistry::contains(Target target) const
{
auto it = _registered_backends.find(target);
diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp
index bf17f80..1dbeae9 100644
--- a/src/graph/backends/CL/CLDeviceBackend.cpp
+++ b/src/graph/backends/CL/CLDeviceBackend.cpp
@@ -62,19 +62,16 @@
/** Register CL backend */
static detail::BackendRegistrar<CLDeviceBackend> CLDeviceBackend_registrar(Target::CL);
-/** Tuner export file */
-static const std::string tuner_data_filename = "acl_tuner.csv";
-
CLDeviceBackend::CLDeviceBackend()
- : _tuner(), _allocator(cl::Context::getDefault())
+ : _context_count(0), _tuner(), _allocator(nullptr), _tuner_file()
{
}
CLDeviceBackend::~CLDeviceBackend()
{
- if(_tuner.tune_new_kernels() && !_tuner.lws_table().empty())
+ if(_tuner.tune_new_kernels() && !_tuner.lws_table().empty() && !_tuner_file.empty())
{
- _tuner.save_to_file(tuner_data_filename);
+ _tuner.save_to_file(_tuner_file);
}
}
@@ -85,22 +82,40 @@
void CLDeviceBackend::initialize_backend()
{
- // Load tuner data if available
- if(_tuner.lws_table().empty() && file_exists(tuner_data_filename))
- {
- _tuner.load_from_file(tuner_data_filename);
- }
-
// Setup Scheduler
CLScheduler::get().default_init(&_tuner);
// Create allocator with new context
- _allocator = CLBufferAllocator();
+ _allocator = support::cpp14::make_unique<CLBufferAllocator>();
+}
+
+void CLDeviceBackend::release_backend_context(GraphContext &ctx)
+{
+ ARM_COMPUTE_UNUSED(ctx);
+ _context_count--;
+ if(_context_count == 0) // No more context using the backend: free resources
+ {
+ _allocator = nullptr;
+ }
}
void CLDeviceBackend::setup_backend_context(GraphContext &ctx)
{
+ // Force backend initialization
+ _context_count++;
+ if(_context_count == 1)
+ {
+ initialize_backend();
+ }
+
// Setup tuner
+ _tuner_file = ctx.config().tuner_file;
+ // Load tuner data if available
+ if(file_exists(_tuner_file))
+ {
+ _tuner.load_from_file(_tuner_file);
+ }
+
set_kernel_tuning(ctx.config().use_tuner);
// Setup a management backend
@@ -123,7 +138,7 @@
IAllocator *CLDeviceBackend::backend_allocator()
{
- return &_allocator;
+ return _allocator.get();
}
std::unique_ptr<ITensorHandle> CLDeviceBackend::create_tensor(const Tensor &tensor)
@@ -179,7 +194,7 @@
auto pool_mgr = std::make_shared<PoolManager>();
auto mm = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
- mm->set_allocator(&_allocator);
+ mm->set_allocator(_allocator.get());
return mm;
}
diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp
index db8a7a0..bf3dcba 100644
--- a/src/graph/backends/CL/CLFunctionsFactory.cpp
+++ b/src/graph/backends/CL/CLFunctionsFactory.cpp
@@ -25,16 +25,9 @@
#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/graph/Graph.h"
-#include "arm_compute/graph/GraphContext.h"
-#include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/TypePrinter.h"
-#include "arm_compute/graph/Types.h"
-#include "arm_compute/graph/backends/Utils.h"
-#include "arm_compute/graph/nodes/Nodes.h"
+#include "arm_compute/graph/backends/FunctionHelpers.h"
#include "arm_compute/runtime/CL/CLFunctions.h"
-#include "support/ToolchainSupport.h"
-
using namespace arm_compute::utils::cast;
namespace arm_compute
@@ -43,526 +36,38 @@
{
namespace backends
{
-namespace
+/** Target specific information structure used to pass information to the layer templates */
+struct CLTargetInfo
{
-/** Returns backing tensor of a given tensor
- *
- * @param[in] tensor Tensor to extract the backing tensor from
- *
- * @return Backing tensor if present else nullptr
- */
-arm_compute::ICLTensor *get_backing_tensor(arm_compute::graph::Tensor *tensor)
+ using TensorType = arm_compute::ICLTensor;
+ static Target TargetType;
+};
+
+Target CLTargetInfo::TargetType = Target::CL;
+
+/** Collection of CL convolution functions */
+struct CLConvolutionLayerFunctions
{
- arm_compute::ICLTensor *backing_tensor = nullptr;
- if(tensor != nullptr)
- {
- ARM_COMPUTE_ERROR_ON(tensor->desc().target != arm_compute::graph::Target::CL);
- // Get backing tensor handle
- ITensorHandle *tensor_handle = tensor->handle();
- // Get backing tensor
- backing_tensor = (tensor_handle != nullptr) ? polymorphic_cast<ICLTensor *>(&tensor_handle->tensor()) : nullptr;
- }
+ using GenericConvolutionLayer = CLConvolutionLayer;
+ using GEMMConvolutionLayer = CLGEMMConvolutionLayer;
+ using DirectConvolutionLayer = CLDirectConvolutionLayer;
+ using WinogradConvolutionLayer = CLWinogradConvolutionLayer;
+};
- return backing_tensor;
-}
-
-/** Create a backend activation layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend activation layer function
- */
-std::unique_ptr<IFunction> create_activation_layer(ActivationLayerNode &node)
+/** Collection of CL depthwise convolution functions */
+struct CLDepthwiseConvolutionLayerFunctions
{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE(
- "Creating CL ActivationLayerNode node with ID : " << node.id() << " and Name: " << node.name()
- << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
+ using GenericDepthwiseConvolutionLayer = CLDepthwiseConvolutionLayer;
+ using DepthwiseConvolutionLayer3x3 = CLDepthwiseConvolutionLayer3x3;
+};
- // Extract IO and info
- ICLTensor *input = get_backing_tensor(node.input(0));
- ICLTensor *output = get_backing_tensor(node.output(0));
- const ActivationLayerInfo act_info = node.activation_info();
-
- // Create function
- auto func = support::cpp14::make_unique<CLActivationLayer>();
- func->configure(input, output, act_info);
-
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated CLActivationLayer"
- << " Data Type: " << input->info()->data_type()
- << " Shape: " << input->info()->tensor_shape()
- << " Activation function: " << act_info.activation()
- << " a: " << act_info.a()
- << " b: " << act_info.b()
- << " InPlace : " << is_in_place_operation(input, output)
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend batch normalization layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend batch normalization layer function
- */
-std::unique_ptr<IFunction> create_batch_normalization_layer(BatchNormalizationLayerNode &node)
+/** Collection of CL element-wise functions */
+struct CLEltwiseFunctions
{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating CL BatchNormalization node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 5);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ICLTensor *input = get_backing_tensor(node.input(0));
- ICLTensor *mean = get_backing_tensor(node.input(1));
- ICLTensor *var = get_backing_tensor(node.input(2));
- ICLTensor *beta = get_backing_tensor(node.input(3));
- ICLTensor *gamma = get_backing_tensor(node.input(4));
- ICLTensor *output = get_backing_tensor(node.output(0));
- const float epsilon = node.epsilon();
- const ActivationLayerInfo fused_act = node.fused_activation();
-
- // Create and configure function
- auto func = support::cpp14::make_unique<CLBatchNormalizationLayer>();
- func->configure(input, output, mean, var, beta, gamma, epsilon, fused_act);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated CLBatchNormalizationLayer"
- << " Data Type: " << input->info()->data_type()
- << " Shape: " << input->info()->tensor_shape()
- << " Epsilon: " << epsilon << " "
- << (fused_act.enabled() ? to_string(fused_act.activation()) : "")
- << " InPlace : " << is_in_place_operation(input, output)
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend convolution layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend convolution layer function
- */
-std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node, GraphContext &ctx)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating CL ConvolutionLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 3);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ICLTensor *input = get_backing_tensor(node.input(0));
- ICLTensor *weights = get_backing_tensor(node.input(1));
- ICLTensor *biases = get_backing_tensor(node.input(2));
- ICLTensor *output = get_backing_tensor(node.output(0));
-
- if(is_data_type_quantized_asymmetric(input->info()->data_type()))
- {
- biases->info()->set_data_type(DataType::S32);
- }
-
- const PadStrideInfo conv_info = node.convolution_info();
- const ConvolutionMethod conv_algorithm = node.convolution_method();
- const bool fast_math = node.fast_math_hint() == FastMathHint::ENABLED;
-
- // Create and configure function (we assume that functions have been validated before creation)
- std::shared_ptr<IMemoryManager> mm = get_memory_manager(ctx, Target::CL);
- std::unique_ptr<IFunction> func;
- std::string func_name;
-
- if(conv_algorithm == ConvolutionMethod::WINOGRAD)
- {
- std::tie(func, func_name) = create_named_memory_managed_function<CLWinogradConvolutionLayer>(
- std::string("CLWinogradConvolutionLayer"), mm, input, weights, biases, output, conv_info, ActivationLayerInfo(), fast_math);
- }
- else if(conv_algorithm == ConvolutionMethod::DIRECT)
- {
- std::tie(func, func_name) = create_named_function<CLDirectConvolutionLayer>(
- std::string("CLDirectConvolutionLayer"), input, weights, biases, output, conv_info);
- }
- else if(conv_algorithm == ConvolutionMethod::GEMM)
- {
- std::tie(func, func_name) = create_named_memory_managed_function<CLGEMMConvolutionLayer>(std::string("CLGEMMConvolutionLayer"), mm,
- input, weights, biases, output, conv_info);
- }
- else
- {
- std::tie(func, func_name) = create_named_memory_managed_function<CLConvolutionLayer>(std::string("CLConvolutionLayer"), mm,
- input, weights, biases, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math);
- }
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
- << " Data Type: " << input->info()->data_type()
- << " Input QuantInfo: " << input->info()->quantization_info()
- << " Weights QuantInfo: " << weights->info()->quantization_info()
- << " Input shape: " << input->info()->tensor_shape()
- << " Weights shape: " << weights->info()->tensor_shape()
- << " Output shape: " << output->info()->tensor_shape()
- << std::endl);
- return func;
-}
-
-/** Create a backend layer depth concatenate function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend depth concatenate layer function
- */
-std::unique_ptr<arm_compute::IFunction> create_depth_concatenate_layer(DepthConcatenateLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating CL DepthConcatenate node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Return nullptr if depth concatenate is switched off
- if(!node.is_enabled())
- {
- return nullptr;
- }
-
- // Extract IO and info
- std::vector<arm_compute::ICLTensor *> inputs;
- for(unsigned int i = 0; i < node.num_inputs(); ++i)
- {
- inputs.push_back(get_backing_tensor(node.input(i)));
- }
- ICLTensor *output = get_backing_tensor(node.output(0));
-
- // Create and configure function
- auto func = support::cpp14::make_unique<CLDepthConcatenateLayer>();
- func->configure(inputs, output);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated CLDepthConcatenateLayer"
- << " Data Type: " << output->info()->data_type()
- << " Shape: " << output->info()->tensor_shape()
- << " Num Inputs: " << inputs.size()
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend layer depth-wise convolution function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend depth-wise convolution layer function
- */
-std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvolutionLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE(
- "Creating CL DepthwiseConvolutionLayer node with ID : " << node.id() << " and Name: " << node.name()
- << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 3);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ICLTensor *input = get_backing_tensor(node.input(0));
- ICLTensor *weights = get_backing_tensor(node.input(1));
- ICLTensor *biases = get_backing_tensor(node.input(2));
- ICLTensor *output = get_backing_tensor(node.output(0));
-
- if(is_data_type_quantized_asymmetric(input->info()->data_type()))
- {
- biases->info()->set_data_type(DataType::S32);
- }
-
- const PadStrideInfo conv_info = node.convolution_info();
- const DepthwiseConvolutionMethod dwc_algorithm = node.depthwise_convolution_method();
-
- // Create and configure function (we assume that functions have been validated before creation)
- std::unique_ptr<IFunction> func;
- std::string func_name;
- if(dwc_algorithm == DepthwiseConvolutionMethod::OPTIMIZED_3x3)
- {
- std::tie(func, func_name) = create_named_function<CLDepthwiseConvolutionLayer3x3>(
- std::string("CLDepthwiseConvolutionLayer3x3"), input, weights, biases, output, conv_info);
- }
- else
- {
- std::tie(func, func_name) = create_named_function<CLDepthwiseConvolutionLayer>(
- std::string("CLDepthwiseConvolutionLayer"), input, weights, biases, output, conv_info);
- }
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
- << " Data Type: " << input->info()->data_type()
- << " Input QuantInfo: " << input->info()->quantization_info()
- << " Weights QuantInfo: " << weights->info()->quantization_info()
- << " Input shape: " << input->info()->tensor_shape()
- << " Weights shape: " << weights->info()->tensor_shape()
- << " Output shape: " << output->info()->tensor_shape()
- << std::endl);
- return func;
-}
-
-/** Create a backend element-wise operation layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend element-wise operation layer function
- */
-std::unique_ptr<IFunction> create_eltwise_layer(EltwiseLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE(
- "Creating CL EltwiseLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 2);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ICLTensor *input1 = get_backing_tensor(node.input(0));
- ICLTensor *input2 = get_backing_tensor(node.input(1));
- ICLTensor *output = get_backing_tensor(node.output(0));
- const EltwiseOperation eltwise_op = node.eltwise_operation();
- const ConvertPolicy convert_policy = node.convert_policy();
- ARM_COMPUTE_ERROR_ON(input1 == nullptr);
- ARM_COMPUTE_ERROR_ON(input2 == nullptr);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- std::unique_ptr<IFunction> func = nullptr;
- std::string func_name;
- if(eltwise_op == EltwiseOperation::ADD)
- {
- std::tie(func, func_name) = create_named_function<CLArithmeticAddition>(std::string("CLArithmeticAddition"),
- input1, input2, output,
- convert_policy);
- }
- else if(eltwise_op == EltwiseOperation::SUB)
- {
- std::tie(func, func_name) = create_named_function<CLArithmeticSubtraction>(
- std::string("CLArithmeticSubtraction"), input1, input2, output, convert_policy);
- }
- else if(eltwise_op == EltwiseOperation::MUL)
- {
- std::tie(func, func_name) = create_named_function<CLPixelWiseMultiplication>(
- std::string("CLPixelWiseMultiplication"), input1, input2, output, 1.f, convert_policy,
- node.rounding_policy());
- }
- else
- {
- ARM_COMPUTE_ERROR("Unsupported element-wise operation!");
- }
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
- << " Data Type: " << input1->info()->data_type()
- << " Shape : " << input1->info()->tensor_shape()
- << std::endl);
-
- return func;
-}
-
-/** Create a backend flatten layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend flatten layer function
- */
-std::unique_ptr<IFunction> create_flatten_layer(FlattenLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE(
- "Creating CL FlattenLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ICLTensor *input = get_backing_tensor(node.input(0));
- ICLTensor *output = get_backing_tensor(node.output(0));
-
- // Create and configure function
- auto func = support::cpp14::make_unique<CLFlattenLayer>();
- func->configure(input, output);
- ARM_COMPUTE_ERROR_ON(input == nullptr);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated CLFlattenLayer"
- << " Data Type: " << input->info()->data_type()
- << " Input shape: " << input->info()->tensor_shape()
- << " Output shape: " << output->info()->tensor_shape()
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend fully connected layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend fully connected layer function
- */
-std::unique_ptr<IFunction> create_fully_connected_layer(FullyConnectedLayerNode &node, GraphContext &ctx)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE(
- "Creating CL FullyConnectedLayer node with ID : " << node.id() << " and Name: " << node.name()
- << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 3);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ICLTensor *input = get_backing_tensor(node.input(0));
- ICLTensor *weights = get_backing_tensor(node.input(1));
- ICLTensor *biases = get_backing_tensor(node.input(2));
- ICLTensor *output = get_backing_tensor(node.output(0));
-
- // Create and configure function
- auto func = support::cpp14::make_unique<CLFullyConnectedLayer>(get_memory_manager(ctx, Target::CL));
- func->configure(input, weights, biases, output);
- ARM_COMPUTE_ERROR_ON(input == nullptr);
- ARM_COMPUTE_ERROR_ON(weights == nullptr);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated CLFullyConnectedLayer"
- << " Data Type: " << input->info()->data_type()
- << " Input shape: " << input->info()->tensor_shape()
- << " Weights shape: " << weights->info()->tensor_shape()
- << " Biases Shape: " << biases->info()->tensor_shape()
- << " Output shape: " << output->info()->tensor_shape()
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend normalization layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend normalization layer function
- */
-std::unique_ptr<IFunction> create_normalization_layer(NormalizationLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE(
- "Creating CL NormalizationLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ICLTensor *input = get_backing_tensor(node.input(0));
- ICLTensor *output = get_backing_tensor(node.output(0));
- const NormalizationLayerInfo norm_info = node.normalization_info();
- ARM_COMPUTE_ERROR_ON(input == nullptr);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- // Create and configure function
- auto func = support::cpp14::make_unique<CLNormalizationLayer>();
- func->configure(input, output, norm_info);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated CLNormalizationLayer"
- << " Data Type: " << input->info()->data_type()
- << " Input shape: " << input->info()->tensor_shape()
- << " Output shape: " << output->info()->tensor_shape()
- << " Normalization info: " << norm_info.type()
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend pooling layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend pooling layer function
- */
-std::unique_ptr<IFunction> create_pooling_layer(PoolingLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE(
- "Creating CL PoolingLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ICLTensor *input = get_backing_tensor(node.input(0));
- ICLTensor *output = get_backing_tensor(node.output(0));
- const PoolingLayerInfo pool_info = node.pooling_info();
- ARM_COMPUTE_ERROR_ON(input == nullptr);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- // Create and configure function
- auto func = support::cpp14::make_unique<CLPoolingLayer>();
- func->configure(input, output, pool_info);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated CLPoolingLayer"
- << " Data Type: " << input->info()->data_type()
- << " Input shape: " << input->info()->tensor_shape()
- << " Output shape: " << output->info()->tensor_shape()
- << " Pooling info: " << pool_info.pool_type()
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend reshape layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend reshape layer function
- */
-std::unique_ptr<IFunction> create_reshape_layer(ReshapeLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE(
- "Creating CL ReshapeLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ICLTensor *input = get_backing_tensor(node.input(0));
- ICLTensor *output = get_backing_tensor(node.output(0));
- ARM_COMPUTE_ERROR_ON(input == nullptr);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- // Create and configure function
- auto func = support::cpp14::make_unique<CLReshapeLayer>();
- func->configure(input, output);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated CLReshapeLayer"
- << " Data Type: " << input->info()->data_type()
- << " Input shape: " << input->info()->tensor_shape()
- << " Output shape: " << output->info()->tensor_shape()
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend softmax layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend softmax layer function
- */
-std::unique_ptr<IFunction> create_softmax_layer(SoftmaxLayerNode &node, GraphContext &ctx)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE(
- "Creating CL SoftmaxLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ICLTensor *input = get_backing_tensor(node.input(0));
- ICLTensor *output = get_backing_tensor(node.output(0));
- const float beta = node.beta();
- ARM_COMPUTE_ERROR_ON(input == nullptr);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- // Create and configure function
- auto func = support::cpp14::make_unique<CLSoftmaxLayer>(get_memory_manager(ctx, Target::CL));
- func->configure(input, output, beta);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated CLSoftmaxLayer"
- << " Data Type: " << input->info()->data_type()
- << " Input shape: " << input->info()->tensor_shape()
- << " Output shape: " << output->info()->tensor_shape()
- << std::endl);
-
- return std::move(func);
-}
-} // namespace
+ using Addition = CLArithmeticAddition;
+ using Subtraction = CLArithmeticSubtraction;
+ using Multiplication = CLPixelWiseMultiplication;
+};
std::unique_ptr<IFunction> CLFunctionFactory::create(INode *node, GraphContext &ctx)
{
@@ -575,33 +80,41 @@
switch(type)
{
case NodeType::ActivationLayer:
- return create_activation_layer(*polymorphic_downcast<ActivationLayerNode *>(node));
+ return detail::create_activation_layer<CLActivationLayer, CLTargetInfo>(*polymorphic_downcast<ActivationLayerNode *>(node));
case NodeType::BatchNormalizationLayer:
- return create_batch_normalization_layer(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
+ return detail::create_batch_normalization_layer<CLBatchNormalizationLayer, CLTargetInfo>(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
+ case NodeType::ChannelShuffleLayer:
+ return detail::create_channel_shuffle_layer<CLChannelShuffleLayer, CLTargetInfo>(*polymorphic_downcast<ChannelShuffleLayerNode *>(node));
case NodeType::ConvolutionLayer:
- return create_convolution_layer(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
- case NodeType::DepthConcatenateLayer:
- return create_depth_concatenate_layer(*polymorphic_downcast<DepthConcatenateLayerNode *>(node));
+ return detail::create_convolution_layer<CLConvolutionLayerFunctions, CLTargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
+ case NodeType::DeconvolutionLayer:
+ return detail::create_deconvolution_layer<CLDeconvolutionLayer, CLTargetInfo>(*polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
+ case NodeType::ConcatenateLayer:
+ return detail::create_concatenate_layer<CLConcatenateLayer, CLTargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
case NodeType::DepthwiseConvolutionLayer:
- return create_depthwise_convolution_layer(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+ return detail::create_depthwise_convolution_layer<CLDepthwiseConvolutionLayerFunctions, CLTargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
case NodeType::EltwiseLayer:
- return create_eltwise_layer(*polymorphic_downcast<EltwiseLayerNode *>(node));
+ return detail::create_eltwise_layer<CLEltwiseFunctions, CLTargetInfo>(*polymorphic_downcast<EltwiseLayerNode *>(node));
case NodeType::FlattenLayer:
- return create_flatten_layer(*polymorphic_downcast<FlattenLayerNode *>(node));
+ return detail::create_flatten_layer<CLFlattenLayer, CLTargetInfo>(*polymorphic_downcast<FlattenLayerNode *>(node));
case NodeType::FullyConnectedLayer:
- return create_fully_connected_layer(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
+ return detail::create_fully_connected_layer<CLFullyConnectedLayer, CLTargetInfo>(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
case NodeType::NormalizationLayer:
- return create_normalization_layer(*polymorphic_downcast<NormalizationLayerNode *>(node));
+ return detail::create_normalization_layer<CLNormalizationLayer, CLTargetInfo>(*polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
+ case NodeType::PermuteLayer:
+ return detail::create_permute_layer<CLPermute, CLTargetInfo>(*polymorphic_downcast<PermuteLayerNode *>(node));
case NodeType::PoolingLayer:
- return create_pooling_layer(*polymorphic_downcast<PoolingLayerNode *>(node));
+ return detail::create_pooling_layer<CLPoolingLayer, CLTargetInfo>(*polymorphic_downcast<PoolingLayerNode *>(node));
case NodeType::ReshapeLayer:
- return create_reshape_layer(*polymorphic_downcast<ReshapeLayerNode *>(node));
+ return detail::create_reshape_layer<CLReshapeLayer, CLTargetInfo>(*polymorphic_downcast<ReshapeLayerNode *>(node));
+ case NodeType::ResizeLayer:
+ return detail::create_resize_layer<CLScale, CLTargetInfo>(*polymorphic_downcast<ResizeLayerNode *>(node));
case NodeType::SoftmaxLayer:
- return create_softmax_layer(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
+ return detail::create_softmax_layer<CLSoftmaxLayer, CLTargetInfo>(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
default:
return nullptr;
}
}
} // namespace backends
} // namespace graph
-} // namespace arm_compute
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/graph/backends/CL/CLNodeValidator.cpp b/src/graph/backends/CL/CLNodeValidator.cpp
index c16b2e6..ba5b59d 100644
--- a/src/graph/backends/CL/CLNodeValidator.cpp
+++ b/src/graph/backends/CL/CLNodeValidator.cpp
@@ -47,6 +47,8 @@
NodeType type = node->type();
switch(type)
{
+ case NodeType::ChannelShuffleLayer:
+ return detail::validate_channel_shuffle_layer<CLChannelShuffleLayer>(*polymorphic_downcast<ChannelShuffleLayerNode *>(node));
case NodeType::ConvolutionLayer:
return detail::validate_convolution_layer<CLConvolutionLayer,
CLDirectConvolutionLayer,
@@ -55,6 +57,8 @@
case NodeType::DepthwiseConvolutionLayer:
return detail::validate_depthwise_convolution_layer<CLDepthwiseConvolutionLayer,
CLDepthwiseConvolutionLayer3x3>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+ case NodeType::PermuteLayer:
+ return detail::validate_permute_layer<CLPermute>(*polymorphic_downcast<PermuteLayerNode *>(node));
default:
return Status{};
}
diff --git a/src/graph/backends/GLES/GCDeviceBackend.cpp b/src/graph/backends/GLES/GCDeviceBackend.cpp
index 770cca5..ec3cf4f 100644
--- a/src/graph/backends/GLES/GCDeviceBackend.cpp
+++ b/src/graph/backends/GLES/GCDeviceBackend.cpp
@@ -53,7 +53,7 @@
static detail::BackendRegistrar<GCDeviceBackend> GCDeviceBackend_registrar(Target::GC);
GCDeviceBackend::GCDeviceBackend()
- : _allocator()
+ : _initialized(false), _allocator()
{
}
@@ -63,8 +63,21 @@
GCScheduler::get().default_init();
}
+void GCDeviceBackend::release_backend_context(GraphContext &ctx)
+{
+ //Nothing to do
+ ARM_COMPUTE_UNUSED(ctx);
+}
+
void GCDeviceBackend::setup_backend_context(GraphContext &ctx)
{
+ // Force backend initialization
+ if(!_initialized)
+ {
+ initialize_backend();
+ _initialized = true;
+ }
+
// Setup a management backend
if(ctx.memory_management_ctx(Target::GC) == nullptr)
{
@@ -144,4 +157,4 @@
}
} // namespace backends
} // namespace graph
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp
index e61e840..f72513c 100644
--- a/src/graph/backends/GLES/GCFunctionsFactory.cpp
+++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp
@@ -25,16 +25,9 @@
#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/graph/Graph.h"
-#include "arm_compute/graph/GraphContext.h"
-#include "arm_compute/graph/Logger.h"
-#include "arm_compute/graph/TypePrinter.h"
-#include "arm_compute/graph/Types.h"
-#include "arm_compute/graph/backends/Utils.h"
-#include "arm_compute/graph/nodes/Nodes.h"
+#include "arm_compute/graph/backends/FunctionHelpers.h"
#include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h"
-#include "support/ToolchainSupport.h"
-
using namespace arm_compute::utils::cast;
namespace arm_compute
@@ -43,120 +36,84 @@
{
namespace backends
{
-namespace
+/** Target specific information structure used to pass information to the layer templates */
+struct GCTargetInfo
{
-/** Returns backing tensor of a given tensor
- *
- * @param[in] tensor Tensor to extract the backing tensor from
- *
- * @return Backing tensor if present else nullptr
- */
-arm_compute::IGCTensor *get_backing_tensor(arm_compute::graph::Tensor *tensor)
+ using TensorType = arm_compute::IGCTensor;
+ static Target TargetType;
+};
+
+Target GCTargetInfo::TargetType = Target::GC;
+
+/** Collection of GC convolution functions */
+struct GCConvolutionLayerFunctions
{
- arm_compute::IGCTensor *backing_tensor = nullptr;
- if(tensor != nullptr)
+ using GenericConvolutionLayer = GCConvolutionLayer;
+ using GEMMConvolutionLayer = GCConvolutionLayer;
+ using DirectConvolutionLayer = GCDirectConvolutionLayer;
+};
+
+/** Collection of GC depthwise convolution functions */
+struct GCDepthwiseConvolutionLayerFunctions
+{
+ using DepthwiseConvolutionLayer3x3 = GCDepthwiseConvolutionLayer3x3;
+};
+
+/** Collection of GC element-wise functions */
+struct GCEltwiseFunctions
+{
+ using Addition = GCArithmeticAddition;
+ using Multiplication = GCPixelWiseMultiplication;
+};
+
+namespace detail
+{
+// Specialize functions
+template <>
+std::unique_ptr<IFunction> create_concatenate_layer<GCDepthConcatenateLayer, GCTargetInfo>(ConcatenateLayerNode &node)
+{
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating Concatenate node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
+ ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
+
+ // Return nullptr if depth concatenate is switched off
+ if(!node.is_enabled())
{
- ARM_COMPUTE_ERROR_ON(tensor->desc().target != arm_compute::graph::Target::GC);
- // Get backing tensor handle
- ITensorHandle *tensor_handle = tensor->handle();
- // Get backing tensor
- backing_tensor = (tensor_handle != nullptr) ? polymorphic_cast<IGCTensor *>(&tensor_handle->tensor()) : nullptr;
+ return nullptr;
}
- return backing_tensor;
-}
-
-/** Create a backend activation layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend activation layer function
- */
-std::unique_ptr<IFunction> create_activation_layer(ActivationLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE(
- "Creating GC ActivationLayerNode node with ID : " << node.id() << " and Name: " << node.name()
- << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
// Extract IO and info
- IGCTensor *input = get_backing_tensor(node.input(0));
- IGCTensor *output = get_backing_tensor(node.output(0));
- const ActivationLayerInfo act_info = node.activation_info();
-
- // Create function
- auto func = support::cpp14::make_unique<GCActivationLayer>();
- func->configure(input, output, act_info);
-
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated GCActivationLayer"
- << " Data Type: " << input->info()->data_type()
- << " Shape: " << input->info()->tensor_shape()
- << " Activation function: " << act_info.activation()
- << " a: " << act_info.a()
- << " b: " << act_info.b()
- << " InPlace : " << is_in_place_operation(input, output)
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend batch normalization layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend batch normalization layer function
- */
-std::unique_ptr<IFunction> create_batch_normalization_layer(BatchNormalizationLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating GC BatchNormalization node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 5);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- IGCTensor *input = get_backing_tensor(node.input(0));
- IGCTensor *mean = get_backing_tensor(node.input(1));
- IGCTensor *var = get_backing_tensor(node.input(2));
- IGCTensor *beta = get_backing_tensor(node.input(3));
- IGCTensor *gamma = get_backing_tensor(node.input(4));
- IGCTensor *output = get_backing_tensor(node.output(0));
- const float epsilon = node.epsilon();
- const ActivationLayerInfo fused_act = node.fused_activation();
+ std::vector<GCTargetInfo::TensorType *> inputs;
+ for(unsigned int i = 0; i < node.num_inputs(); ++i)
+ {
+ inputs.push_back(get_backing_tensor<GCTargetInfo>(node.input(i)));
+ }
+ typename GCTargetInfo::TensorType *output = get_backing_tensor<GCTargetInfo>(node.output(0));
// Create and configure function
- auto func = support::cpp14::make_unique<GCBatchNormalizationLayer>();
- func->configure(input, output, mean, var, beta, gamma, epsilon, fused_act);
+ auto func = support::cpp14::make_unique<GCDepthConcatenateLayer>();
+ func->configure(inputs, output);
// Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated GCBatchNormalizationLayer"
- << " Data Type: " << input->info()->data_type()
- << " Shape: " << input->info()->tensor_shape()
- << " Epsilon: " << epsilon << " "
- << (fused_act.enabled() ? to_string(fused_act.activation()) : "")
- << " InPlace : " << is_in_place_operation(input, output)
+ ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << node.type()
+ << " Target " << GCTargetInfo::TargetType
+ << " Data Type: " << output->info()->data_type()
+ << " Shape: " << output->info()->tensor_shape()
+ << " Num Inputs: " << inputs.size()
<< std::endl);
return std::move(func);
}
-/** Create a backend convolution layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend convolution layer function
- */
-std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node, GraphContext &ctx)
+template <>
+std::unique_ptr<IFunction> create_convolution_layer<GCConvolutionLayerFunctions, GCTargetInfo>(ConvolutionLayerNode &node, GraphContext &ctx)
{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating GC ConvolutionLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 3);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
+ validate_node<GCTargetInfo>(node, 3 /* expected inputs */, 1 /* expected outputs */);
// Extract IO and info
- IGCTensor *input = get_backing_tensor(node.input(0));
- IGCTensor *weights = get_backing_tensor(node.input(1));
- IGCTensor *biases = get_backing_tensor(node.input(2));
- IGCTensor *output = get_backing_tensor(node.output(0));
+ GCTargetInfo::TensorType *input = get_backing_tensor<GCTargetInfo>(node.input(0));
+ GCTargetInfo::TensorType *weights = get_backing_tensor<GCTargetInfo>(node.input(1));
+ GCTargetInfo::TensorType *biases = get_backing_tensor<GCTargetInfo>(node.input(2));
+ GCTargetInfo::TensorType *output = get_backing_tensor<GCTargetInfo>(node.output(0));
if(is_data_type_quantized_asymmetric(input->info()->data_type()))
{
@@ -167,19 +124,21 @@
const ConvolutionMethod conv_algorithm = node.convolution_method();
// Create and configure function (we assume that functions have been validated before creation)
- std::shared_ptr<IMemoryManager> mm = get_memory_manager(ctx, Target::GC);
+ std::shared_ptr<IMemoryManager> mm = get_memory_manager(ctx, GCTargetInfo::TargetType);
std::unique_ptr<IFunction> func;
std::string func_name;
- if(conv_algorithm == ConvolutionMethod::DIRECT)
+ if(conv_algorithm == ConvolutionMethod::Direct)
{
- std::tie(func, func_name) = create_named_function<GCDirectConvolutionLayer>(
- std::string("GCDirectConvolutionLayer"), input, weights, biases, output, conv_info);
+ std::tie(func, func_name) = create_named_function<GCConvolutionLayerFunctions::DirectConvolutionLayer>(
+ std::string("DirectConvolutionLayer"),
+ input, weights, biases, output, conv_info);
}
else
{
- std::tie(func, func_name) = create_named_memory_managed_function<GCConvolutionLayer>(std::string("GCConvolutionLayer"), mm,
- input, weights, biases, output, conv_info);
+ std::tie(func, func_name) = create_named_memory_managed_function<GCConvolutionLayerFunctions::GenericConvolutionLayer>(
+ std::string("ConvolutionLayer"), mm,
+ input, weights, biases, output, conv_info);
}
// Log info
@@ -194,64 +153,16 @@
return func;
}
-/** Create a backend layer depth concatenate function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend depth concatenate layer function
- */
-std::unique_ptr<arm_compute::IFunction> create_depth_concatenate_layer(DepthConcatenateLayerNode &node)
+template <>
+std::unique_ptr<IFunction> create_depthwise_convolution_layer<GCDepthwiseConvolutionLayerFunctions, GCTargetInfo>(DepthwiseConvolutionLayerNode &node)
{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating GC DepthConcatenate node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Return nullptr if depth concatenate is switched off
- if(!node.is_enabled())
- {
- return nullptr;
- }
+ validate_node<GCTargetInfo>(node, 3 /* expected inputs */, 1 /* expected outputs */);
// Extract IO and info
- std::vector<arm_compute::IGCTensor *> inputs;
- for(unsigned int i = 0; i < node.num_inputs(); ++i)
- {
- inputs.push_back(get_backing_tensor(node.input(i)));
- }
- IGCTensor *output = get_backing_tensor(node.output(0));
-
- // Create and configure function
- auto func = support::cpp14::make_unique<GCDepthConcatenateLayer>();
- func->configure(inputs, output);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated GCDepthConcatenateLayer"
- << " Data Type: " << output->info()->data_type()
- << " Shape: " << output->info()->tensor_shape()
- << " Num Inputs: " << inputs.size()
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend layer depth-wise convolution function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend depth-wise convolution layer function
- */
-std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvolutionLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE(
- "Creating GC DepthwiseConvolutionLayer node with ID : " << node.id() << " and Name: " << node.name()
- << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 3);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- IGCTensor *input = get_backing_tensor(node.input(0));
- IGCTensor *weights = get_backing_tensor(node.input(1));
- IGCTensor *biases = get_backing_tensor(node.input(2));
- IGCTensor *output = get_backing_tensor(node.output(0));
+ GCTargetInfo::TensorType *input = get_backing_tensor<GCTargetInfo>(node.input(0));
+ GCTargetInfo::TensorType *weights = get_backing_tensor<GCTargetInfo>(node.input(1));
+ GCTargetInfo::TensorType *biases = get_backing_tensor<GCTargetInfo>(node.input(2));
+ GCTargetInfo::TensorType *output = get_backing_tensor<GCTargetInfo>(node.output(0));
if(is_data_type_quantized_asymmetric(input->info()->data_type()))
{
@@ -264,10 +175,11 @@
// Create and configure function (we assume that functions have been validated before creation)
std::unique_ptr<IFunction> func;
std::string func_name;
- if(dwc_algorithm == DepthwiseConvolutionMethod::OPTIMIZED_3x3)
+ if(dwc_algorithm == DepthwiseConvolutionMethod::Optimized3x3)
{
- std::tie(func, func_name) = create_named_function<GCDepthwiseConvolutionLayer3x3>(
- std::string("GCDepthwiseConvolutionLayer3x3"), input, weights, biases, output, conv_info);
+ std::tie(func, func_name) = create_named_function<GCDepthwiseConvolutionLayerFunctions::DepthwiseConvolutionLayer3x3>(
+ std::string("DepthwiseConvolutionLayer3x3"),
+ input, weights, biases, output, conv_info);
}
else
{
@@ -276,6 +188,7 @@
// Log info
ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
+ << " Target " << GCTargetInfo::TargetType
<< " Data Type: " << input->info()->data_type()
<< " Input QuantInfo: " << input->info()->quantization_info()
<< " Weights QuantInfo: " << weights->info()->quantization_info()
@@ -286,13 +199,8 @@
return func;
}
-/** Create a backend element-wise operation layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend element-wise operation layer function
- */
-std::unique_ptr<IFunction> create_eltwise_layer(EltwiseLayerNode &node)
+template <>
+std::unique_ptr<IFunction> create_eltwise_layer<GCEltwiseFunctions, GCTargetInfo>(EltwiseLayerNode &node)
{
ARM_COMPUTE_LOG_GRAPH_VERBOSE(
"Creating GC EltwiseLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
@@ -300,31 +208,32 @@
ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
// Extract IO and info
- IGCTensor *input1 = get_backing_tensor(node.input(0));
- IGCTensor *input2 = get_backing_tensor(node.input(1));
- IGCTensor *output = get_backing_tensor(node.output(0));
- const EltwiseOperation eltwise_op = node.eltwise_operation();
- const ConvertPolicy convert_policy = node.convert_policy();
+ GCTargetInfo::TensorType *input1 = get_backing_tensor<GCTargetInfo>(node.input(0));
+ GCTargetInfo::TensorType *input2 = get_backing_tensor<GCTargetInfo>(node.input(1));
+ GCTargetInfo::TensorType *output = get_backing_tensor<GCTargetInfo>(node.output(0));
+ const EltwiseOperation eltwise_op = node.eltwise_operation();
+ const ConvertPolicy convert_policy = node.convert_policy();
ARM_COMPUTE_ERROR_ON(input1 == nullptr);
ARM_COMPUTE_ERROR_ON(input2 == nullptr);
ARM_COMPUTE_ERROR_ON(output == nullptr);
std::unique_ptr<IFunction> func = nullptr;
std::string func_name;
- if(eltwise_op == EltwiseOperation::ADD)
+ if(eltwise_op == EltwiseOperation::Add)
{
- std::tie(func, func_name) = create_named_function<GCArithmeticAddition>(std::string("GCArithmeticAddition"),
- input1, input2, output,
- convert_policy);
+ std::tie(func, func_name) = create_named_function<GCEltwiseFunctions::Addition>(
+ std::string("GCArithmeticAddition"),
+ input1, input2, output, convert_policy);
}
- else if(eltwise_op == EltwiseOperation::SUB)
+ else if(eltwise_op == EltwiseOperation::Sub)
{
ARM_COMPUTE_ERROR("Arithmetic subtraction is not supported in GLES backend");
}
- else if(eltwise_op == EltwiseOperation::MUL)
+ else if(eltwise_op == EltwiseOperation::Mul)
{
- std::tie(func, func_name) = create_named_function<GCPixelWiseMultiplication>(
- std::string("GCPixelWiseMultiplication"), input1, input2, output, 1.f);
+ std::tie(func, func_name) = create_named_function<GCEltwiseFunctions::Multiplication>(
+ std::string("PixelWiseMultiplication"),
+ input1, input2, output, 1.f);
}
else
{
@@ -332,157 +241,16 @@
}
// Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
+ ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << node.type()
+ << " Target " << GCTargetInfo::TargetType
+ << " Operation " << func_name
<< " Data Type: " << input1->info()->data_type()
<< " Shape : " << input1->info()->tensor_shape()
<< std::endl);
return func;
}
-
-/** Create a backend fully connected layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend fully connected layer function
- */
-std::unique_ptr<IFunction> create_fully_connected_layer(FullyConnectedLayerNode &node, GraphContext &ctx)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE(
- "Creating GC FullyConnectedLayer node with ID : " << node.id() << " and Name: " << node.name()
- << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 3);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- IGCTensor *input = get_backing_tensor(node.input(0));
- IGCTensor *weights = get_backing_tensor(node.input(1));
- IGCTensor *biases = get_backing_tensor(node.input(2));
- IGCTensor *output = get_backing_tensor(node.output(0));
-
- // Create and configure function
- auto func = support::cpp14::make_unique<GCFullyConnectedLayer>(get_memory_manager(ctx, Target::GC));
- func->configure(input, weights, biases, output);
- ARM_COMPUTE_ERROR_ON(input == nullptr);
- ARM_COMPUTE_ERROR_ON(weights == nullptr);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated GCFullyConnectedLayer"
- << " Data Type: " << input->info()->data_type()
- << " Input shape: " << input->info()->tensor_shape()
- << " Weights shape: " << weights->info()->tensor_shape()
- << " Biases Shape: " << biases->info()->tensor_shape()
- << " Output shape: " << output->info()->tensor_shape()
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend normalization layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend normalization layer function
- */
-std::unique_ptr<IFunction> create_normalization_layer(NormalizationLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE(
- "Creating GC NormalizationLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- IGCTensor *input = get_backing_tensor(node.input(0));
- IGCTensor *output = get_backing_tensor(node.output(0));
- const NormalizationLayerInfo norm_info = node.normalization_info();
- ARM_COMPUTE_ERROR_ON(input == nullptr);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- // Create and configure function
- auto func = support::cpp14::make_unique<GCNormalizationLayer>();
- func->configure(input, output, norm_info);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated GCNormalizationLayer"
- << " Data Type: " << input->info()->data_type()
- << " Input shape: " << input->info()->tensor_shape()
- << " Output shape: " << output->info()->tensor_shape()
- << " Normalization info: " << norm_info.type()
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend pooling layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend pooling layer function
- */
-std::unique_ptr<IFunction> create_pooling_layer(PoolingLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE(
- "Creating GC PoolingLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- IGCTensor *input = get_backing_tensor(node.input(0));
- IGCTensor *output = get_backing_tensor(node.output(0));
- const PoolingLayerInfo pool_info = node.pooling_info();
- ARM_COMPUTE_ERROR_ON(input == nullptr);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- // Create and configure function
- auto func = support::cpp14::make_unique<GCPoolingLayer>();
- func->configure(input, output, pool_info);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated GCPoolingLayer"
- << " Data Type: " << input->info()->data_type()
- << " Input shape: " << input->info()->tensor_shape()
- << " Output shape: " << output->info()->tensor_shape()
- << " Pooling info: " << pool_info.pool_type()
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend softmax layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend softmax layer function
- */
-std::unique_ptr<IFunction> create_softmax_layer(SoftmaxLayerNode &node, GraphContext &ctx)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE(
- "Creating GC SoftmaxLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- IGCTensor *input = get_backing_tensor(node.input(0));
- IGCTensor *output = get_backing_tensor(node.output(0));
- const float beta = node.beta();
- ARM_COMPUTE_ERROR_ON(input == nullptr);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- // Create and configure function
- auto func = support::cpp14::make_unique<GCSoftmaxLayer>(get_memory_manager(ctx, Target::CL));
- func->configure(input, output, beta);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated GCSoftmaxLayer"
- << " Data Type: " << input->info()->data_type()
- << " Input shape: " << input->info()->tensor_shape()
- << " Output shape: " << output->info()->tensor_shape()
- << std::endl);
-
- return std::move(func);
-}
-} // namespace
+} //namespace detail
std::unique_ptr<IFunction> GCFunctionFactory::create(INode *node, GraphContext &ctx)
{
@@ -495,29 +263,31 @@
switch(type)
{
case NodeType::ActivationLayer:
- return create_activation_layer(*polymorphic_downcast<ActivationLayerNode *>(node));
+ return detail::create_activation_layer<GCActivationLayer, GCTargetInfo>(*polymorphic_downcast<ActivationLayerNode *>(node));
case NodeType::BatchNormalizationLayer:
- return create_batch_normalization_layer(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
+ return detail::create_batch_normalization_layer<GCBatchNormalizationLayer, GCTargetInfo>(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
case NodeType::ConvolutionLayer:
- return create_convolution_layer(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
- case NodeType::DepthConcatenateLayer:
- return create_depth_concatenate_layer(*polymorphic_downcast<DepthConcatenateLayerNode *>(node));
+ return detail::create_convolution_layer<GCConvolutionLayerFunctions, GCTargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
+ case NodeType::ConcatenateLayer:
+ return detail::create_concatenate_layer<GCDepthConcatenateLayer, GCTargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
case NodeType::DepthwiseConvolutionLayer:
- return create_depthwise_convolution_layer(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+ return detail::create_depthwise_convolution_layer<GCDepthwiseConvolutionLayerFunctions, GCTargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
case NodeType::EltwiseLayer:
- return create_eltwise_layer(*polymorphic_downcast<EltwiseLayerNode *>(node));
+ return detail::create_eltwise_layer<GCEltwiseFunctions, GCTargetInfo>(*polymorphic_downcast<EltwiseLayerNode *>(node));
case NodeType::FullyConnectedLayer:
- return create_fully_connected_layer(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
+ return detail::create_fully_connected_layer<GCFullyConnectedLayer, GCTargetInfo>(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
case NodeType::NormalizationLayer:
- return create_normalization_layer(*polymorphic_downcast<NormalizationLayerNode *>(node));
+ return detail::create_normalization_layer<GCNormalizationLayer, GCTargetInfo>(*polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
case NodeType::PoolingLayer:
- return create_pooling_layer(*polymorphic_downcast<PoolingLayerNode *>(node));
+ return detail::create_pooling_layer<GCPoolingLayer, GCTargetInfo>(*polymorphic_downcast<PoolingLayerNode *>(node));
+ case NodeType::ResizeLayer:
+ return detail::create_resize_layer<GCScale, GCTargetInfo>(*polymorphic_downcast<ResizeLayerNode *>(node));
case NodeType::SoftmaxLayer:
- return create_softmax_layer(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
+ return detail::create_softmax_layer<GCSoftmaxLayer, GCTargetInfo>(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
default:
return nullptr;
}
}
} // namespace backends
} // namespace graph
-} // namespace arm_compute
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/graph/backends/GLES/GCNodeValidator.cpp b/src/graph/backends/GLES/GCNodeValidator.cpp
index c7f7d81..53049c7 100644
--- a/src/graph/backends/GLES/GCNodeValidator.cpp
+++ b/src/graph/backends/GLES/GCNodeValidator.cpp
@@ -57,7 +57,7 @@
// Validate function
ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->tensor_shape().x() != 3 && weights->tensor_shape().y() != 3, "Unsupported depthwise convolution");
- node.set_depthwise_convolution_method(DepthwiseConvolutionMethod::OPTIMIZED_3x3);
+ node.set_depthwise_convolution_method(DepthwiseConvolutionMethod::Optimized3x3);
return Status{};
}
@@ -79,15 +79,13 @@
const ConvolutionMethod conv_algorithm = node.convolution_method();
// Validate function
- if(conv_algorithm == ConvolutionMethod::DIRECT)
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(node.num_groups() != 1, "Grouping is not supported by ConvolutionLayer!");
+ if(conv_algorithm == ConvolutionMethod::Direct)
{
bool is_square = weights->tensor_shape().x() == weights->tensor_shape().y();
bool is_direct = (weights->tensor_shape().x() == 1) || (weights->tensor_shape().x() == 3) || (weights->tensor_shape().x() == 5);
bool is_correct_stride = (conv_info.stride().first) <= 2 && (conv_info.stride().second <= 2);
- if(!(is_square && is_direct && is_correct_stride))
- {
- node.set_convolution_method(ConvolutionMethod::DEFAULT);
- }
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(is_square && is_direct && is_correct_stride), "Direct convolution is not supported for given configuration");
}
return Status{};
@@ -104,14 +102,18 @@
NodeType type = node->type();
switch(type)
{
+ case NodeType::ChannelShuffleLayer:
+ return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : ChannelShuffleLayer");
case NodeType::ConvolutionLayer:
return validate_convolution_layer(*polymorphic_downcast<ConvolutionLayerNode *>(node));
case NodeType::DepthwiseConvolutionLayer:
return validate_depthwise_convolution_layer(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
case NodeType::FlattenLayer:
- return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation");
+ return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : FlattenLayer");
+ case NodeType::PermuteLayer:
+ return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : PermuteLayer");
case NodeType::ReshapeLayer:
- return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation");
+ return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : ReshapeLayer");
default:
return Status{};
}
diff --git a/src/graph/backends/NEON/NEDeviceBackend.cpp b/src/graph/backends/NEON/NEDeviceBackend.cpp
index 7c2db40..5fc44d0 100644
--- a/src/graph/backends/NEON/NEDeviceBackend.cpp
+++ b/src/graph/backends/NEON/NEDeviceBackend.cpp
@@ -61,6 +61,13 @@
void NEDeviceBackend::initialize_backend()
{
+ //Nothing to do
+}
+
+void NEDeviceBackend::release_backend_context(GraphContext &ctx)
+{
+ //Nothing to do
+ ARM_COMPUTE_UNUSED(ctx);
}
void NEDeviceBackend::setup_backend_context(GraphContext &ctx)
@@ -155,4 +162,4 @@
}
} // namespace backends
} // namespace graph
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp
index 7b1c50f..36a25ad 100644
--- a/src/graph/backends/NEON/NEFunctionFactory.cpp
+++ b/src/graph/backends/NEON/NEFunctionFactory.cpp
@@ -28,6 +28,7 @@
#include "arm_compute/graph/GraphContext.h"
#include "arm_compute/graph/Logger.h"
#include "arm_compute/graph/TypePrinter.h"
+#include "arm_compute/graph/backends/FunctionHelpers.h"
#include "arm_compute/graph/backends/Utils.h"
#include "arm_compute/graph/nodes/Nodes.h"
#include "arm_compute/runtime/NEON/NEFunctions.h"
@@ -41,108 +42,53 @@
{
namespace backends
{
-namespace
+/** Target specific information structure used to pass information to the layer templates */
+struct NETargetInfo
{
-/** Returns backing tensor of a given tensor
- *
- * @param[in] tensor Tensor to extract the backing tensor from
- *
- * @return Backing tensor if present else nullptr
- */
-arm_compute::ITensor *get_backing_tensor(arm_compute::graph::Tensor *tensor)
-{
- return ((tensor == nullptr) || (tensor->handle() == nullptr)) ? nullptr : &tensor->handle()->tensor();
-}
+ using TensorType = arm_compute::ITensor;
+ static Target TargetType;
+};
-/** Create a backend activation layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend activation layer function
- */
-std::unique_ptr<IFunction> create_activation_layer(ActivationLayerNode &node)
+Target NETargetInfo::TargetType = Target::NEON;
+
+/** Collection of CL convolution functions */
+struct NEConvolutionLayerFunctions
{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON ActivationLayerNode node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
+ using GenericConvolutionLayer = NEConvolutionLayer;
+ using GEMMConvolutionLayer = NEGEMMConvolutionLayer;
+ using DirectConvolutionLayer = NEDirectConvolutionLayer;
+ using WinogradConvolutionLayer = NEWinogradConvolutionLayer;
+};
+
+/** Collection of CL depthwise convolution functions */
+struct NEDepthwiseConvolutionLayerFunctions
+{
+ using GenericDepthwiseConvolutionLayer = NEDepthwiseConvolutionLayer;
+ using DepthwiseConvolutionLayer3x3 = NEDepthwiseConvolutionLayer3x3;
+};
+
+/** Collection of CL element-wise functions */
+struct NEEltwiseFunctions
+{
+ using Addition = NEArithmeticAddition;
+ using Subtraction = NEArithmeticSubtraction;
+ using Multiplication = NEPixelWiseMultiplication;
+};
+
+namespace detail
+{
+// Specialize functions
+template <>
+std::unique_ptr<IFunction> create_convolution_layer<NEConvolutionLayerFunctions, NETargetInfo>(ConvolutionLayerNode &node,
+ GraphContext &ctx)
+{
+ validate_node<NETargetInfo>(node, 3 /* expected inputs */, 1 /* expected outputs */);
// Extract IO and info
- ITensor *input = get_backing_tensor(node.input(0));
- ITensor *output = get_backing_tensor(node.output(0));
- const ActivationLayerInfo act_info = node.activation_info();
-
- // Create function
- auto func = support::cpp14::make_unique<NEActivationLayer>();
- func->configure(input, output, act_info);
-
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated NEActivationLayer"
- << " Data Type: " << input->info()->data_type()
- << " Shape: " << input->info()->tensor_shape()
- << " Activation function: " << act_info.activation()
- << " a: " << act_info.a()
- << " b: " << act_info.b()
- << " InPlace : " << is_in_place_operation(input, output)
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend batch normalization layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend batch normalization layer function
- */
-std::unique_ptr<IFunction> create_batch_normalization_layer(BatchNormalizationLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON BatchNormalization node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
-
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 5);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ITensor *input = get_backing_tensor(node.input(0));
- ITensor *mean = get_backing_tensor(node.input(1));
- ITensor *var = get_backing_tensor(node.input(2));
- ITensor *beta = get_backing_tensor(node.input(3));
- ITensor *gamma = get_backing_tensor(node.input(4));
- ITensor *output = get_backing_tensor(node.output(0));
- const float epsilon = node.epsilon();
- const ActivationLayerInfo fused_act = node.fused_activation();
-
- // Create and configure function
- auto func = support::cpp14::make_unique<NEBatchNormalizationLayer>();
- func->configure(input, output, mean, var, beta, gamma, epsilon, fused_act);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated NEBatchNormalizationLayer"
- << " Data Type: " << input->info()->data_type()
- << " Shape: " << input->info()->tensor_shape()
- << " Epsilon: " << epsilon << " "
- << (fused_act.enabled() ? to_string(fused_act.activation()) : "")
- << " InPlace : " << is_in_place_operation(input, output)
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend convolution layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend convolution layer function
- */
-std::unique_ptr<IFunction> create_convolution_layer(ConvolutionLayerNode &node, GraphContext &ctx)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON ConvolutionLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 3);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ITensor *input = get_backing_tensor(node.input(0));
- ITensor *weights = get_backing_tensor(node.input(1));
- ITensor *biases = get_backing_tensor(node.input(2));
- ITensor *output = get_backing_tensor(node.output(0));
+ NETargetInfo::TensorType *input = get_backing_tensor<NETargetInfo>(node.input(0));
+ NETargetInfo::TensorType *weights = get_backing_tensor<NETargetInfo>(node.input(1));
+ NETargetInfo::TensorType *biases = get_backing_tensor<NETargetInfo>(node.input(2));
+ NETargetInfo::TensorType *output = get_backing_tensor<NETargetInfo>(node.output(0));
if(is_data_type_quantized_asymmetric(input->info()->data_type()))
{
@@ -156,29 +102,30 @@
std::shared_ptr<IMemoryManager> mm = get_memory_manager(ctx, Target::NEON);
std::unique_ptr<IFunction> func;
std::string func_name;
- if(conv_algorithm == ConvolutionMethod::DIRECT)
+ if(conv_algorithm == ConvolutionMethod::Direct)
{
- std::tie(func, func_name) = create_named_memory_managed_function<NEDirectConvolutionLayer>(std::string("NEDirectConvolutionLayer"), mm,
- input, weights, biases, output, conv_info);
+ std::tie(func, func_name) = create_named_memory_managed_function<NEDirectConvolutionLayer>(
+ std::string("DirectConvolutionLayer"), mm, input, weights, biases, output, conv_info);
}
else if(conv_algorithm == ConvolutionMethod::GEMM)
{
- std::tie(func, func_name) = create_named_memory_managed_function<NEGEMMConvolutionLayer>(std::string("NEGEMMConvolutionLayer"), mm,
- input, weights, biases, output, conv_info);
+ std::tie(func, func_name) = create_named_memory_managed_function<NEGEMMConvolutionLayer>(
+ std::string("GEMMConvolutionLayer"), mm, input, weights, biases, output, conv_info);
}
- else if(conv_algorithm == ConvolutionMethod::WINOGRAD)
+ else if(conv_algorithm == ConvolutionMethod::Winograd)
{
- std::tie(func, func_name) = create_named_memory_managed_function<NEWinogradConvolutionLayer>(std::string("NEWinogradConvolutionLayer"), mm,
- input, weights, biases, output, conv_info);
+ std::tie(func, func_name) = create_named_memory_managed_function<NEWinogradConvolutionLayer>(
+ std::string("WinogradConvolutionLayer"), mm, input, weights, biases, output, conv_info);
}
else
{
- std::tie(func, func_name) = create_named_memory_managed_function<NEConvolutionLayer>(std::string("NEConvolutionLayer"), mm,
- input, weights, biases, output, conv_info);
+ std::tie(func, func_name) = create_named_memory_managed_function<NEConvolutionLayer>(
+ std::string("ConvolutionLayer"), mm, input, weights, biases, output, conv_info);
}
// Log info
ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
+ << " Target " << NETargetInfo::TargetType
<< " Data Type: " << input->info()->data_type()
<< " Input QuantInfo: " << input->info()->quantization_info()
<< " Weights QuantInfo: " << weights->info()->quantization_info()
@@ -189,244 +136,25 @@
return func;
}
-/** Create a backend layer depth concatenate function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend depth concatenate layer function
- */
-std::unique_ptr<arm_compute::IFunction> create_depth_concatenate_layer(DepthConcatenateLayerNode &node)
+template <>
+std::unique_ptr<IFunction> create_normalization_layer<NENormalizationLayer, NETargetInfo>(NormalizationLayerNode &node, GraphContext &ctx)
{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON DepthConcatenate node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Return nullptr if depth concatenate is switched off
- if(!node.is_enabled())
- {
- return nullptr;
- }
+ validate_node<NETargetInfo>(node, 1 /* expected inputs */, 1 /* expected outputs */);
// Extract IO and info
- std::vector<arm_compute::ITensor *> inputs;
- for(unsigned int i = 0; i < node.num_inputs(); ++i)
- {
- inputs.push_back(get_backing_tensor(node.input(i)));
- }
- ITensor *output = get_backing_tensor(node.output(0));
-
- // Create and configure function
- auto func = support::cpp14::make_unique<NEDepthConcatenateLayer>();
- func->configure(inputs, output);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated NEDepthConcatenateLayer"
- << " Data Type: " << output->info()->data_type()
- << " Shape: " << output->info()->tensor_shape()
- << " Num Inputs: " << inputs.size()
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend layer depth-wise convolution function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend depth-wise convolution layer function
- */
-std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvolutionLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON DepthwiseConvolutionLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 3);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ITensor *input = get_backing_tensor(node.input(0));
- ITensor *weights = get_backing_tensor(node.input(1));
- ITensor *biases = get_backing_tensor(node.input(2));
- ITensor *output = get_backing_tensor(node.output(0));
-
- if(is_data_type_quantized_asymmetric(input->info()->data_type()))
- {
- biases->info()->set_data_type(DataType::S32);
- }
-
- const PadStrideInfo conv_info = node.convolution_info();
- const DepthwiseConvolutionMethod dwc_algorithm = node.depthwise_convolution_method();
-
- // Create and configure function (we assume that functions have been validated before creation)
- std::unique_ptr<IFunction> func;
- std::string func_name;
- if(dwc_algorithm == DepthwiseConvolutionMethod::OPTIMIZED_3x3)
- {
- std::tie(func, func_name) = create_named_function<NEDepthwiseConvolutionLayer3x3>(std::string("NEDepthwiseConvolutionLayer3x3"),
- input, weights, biases, output, conv_info);
- }
- else
- {
- std::tie(func, func_name) = create_named_function<NEDepthwiseConvolutionLayer>(std::string("NEDepthwiseConvolutionLayer"),
- input, weights, biases, output, conv_info);
- }
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
- << " Data Type: " << input->info()->data_type()
- << " Input QuantInfo: " << input->info()->quantization_info()
- << " Weights QuantInfo: " << weights->info()->quantization_info()
- << " Input shape: " << input->info()->tensor_shape()
- << " Weights shape: " << weights->info()->tensor_shape()
- << " Output shape: " << output->info()->tensor_shape()
- << std::endl);
- return func;
-}
-
-/** Create a backend element-wise operation layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend element-wise operation layer function
- */
-std::unique_ptr<IFunction> create_eltwise_layer(EltwiseLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON EltwiseLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 2);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ITensor *input1 = get_backing_tensor(node.input(0));
- ITensor *input2 = get_backing_tensor(node.input(1));
- ITensor *output = get_backing_tensor(node.output(0));
- const EltwiseOperation eltwise_op = node.eltwise_operation();
- const ConvertPolicy convert_policy = node.convert_policy();
- ARM_COMPUTE_ERROR_ON(input1 == nullptr);
- ARM_COMPUTE_ERROR_ON(input2 == nullptr);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- std::unique_ptr<IFunction> func = nullptr;
- std::string func_name;
- if(eltwise_op == EltwiseOperation::ADD)
- {
- std::tie(func, func_name) = create_named_function<NEArithmeticAddition>(std::string("NEArithmeticAddition"),
- input1, input2, output, convert_policy);
- }
- else if(eltwise_op == EltwiseOperation::SUB)
- {
- std::tie(func, func_name) = create_named_function<NEArithmeticSubtraction>(std::string("NEArithmeticSubtraction"),
- input1, input2, output, convert_policy);
- }
- else if(eltwise_op == EltwiseOperation::MUL)
- {
- std::tie(func, func_name) = create_named_function<NEPixelWiseMultiplication>(std::string("NEPixelWiseMultiplication"),
- input1, input2, output, 1.f,
- convert_policy, node.rounding_policy());
- }
- else
- {
- ARM_COMPUTE_ERROR("Unsupported element-wise operation!");
- }
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << func_name
- << " Data Type: " << input1->info()->data_type()
- << " Shape : " << input1->info()->tensor_shape()
- << std::endl);
-
- return func;
-}
-
-/** Create a backend flatten layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend flatten layer function
- */
-std::unique_ptr<IFunction> create_flatten_layer(FlattenLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON FlattenLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ITensor *input = get_backing_tensor(node.input(0));
- ITensor *output = get_backing_tensor(node.output(0));
-
- // Create and configure function
- auto func = support::cpp14::make_unique<NEFlattenLayer>();
- func->configure(input, output);
- ARM_COMPUTE_ERROR_ON(input == nullptr);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated NEFlattenLayer"
- << " Data Type: " << input->info()->data_type()
- << " Input shape: " << input->info()->tensor_shape()
- << " Output shape: " << output->info()->tensor_shape()
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend fully connected layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend fully connected layer function
- */
-std::unique_ptr<IFunction> create_fully_connected_layer(FullyConnectedLayerNode &node, GraphContext &ctx)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON FullyConnectedLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 3);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ITensor *input = get_backing_tensor(node.input(0));
- ITensor *weights = get_backing_tensor(node.input(1));
- ITensor *biases = get_backing_tensor(node.input(2));
- ITensor *output = get_backing_tensor(node.output(0));
-
- // Create and configure function
- auto func = support::cpp14::make_unique<NEFullyConnectedLayer>(get_memory_manager(ctx, Target::NEON));
- func->configure(input, weights, biases, output);
- ARM_COMPUTE_ERROR_ON(input == nullptr);
- ARM_COMPUTE_ERROR_ON(weights == nullptr);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated NEFullyConnectedLayer"
- << " Data Type: " << input->info()->data_type()
- << " Input shape: " << input->info()->tensor_shape()
- << " Weights shape: " << weights->info()->tensor_shape()
- << " Output shape: " << output->info()->tensor_shape()
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend normalization layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend normalization layer function
- */
-std::unique_ptr<IFunction> create_normalization_layer(NormalizationLayerNode &node, GraphContext &ctx)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON NormalizationLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ITensor *input = get_backing_tensor(node.input(0));
- ITensor *output = get_backing_tensor(node.output(0));
+ NETargetInfo::TensorType *input = get_backing_tensor<NETargetInfo>(node.input(0));
+ NETargetInfo::TensorType *output = get_backing_tensor<NETargetInfo>(node.output(0));
const NormalizationLayerInfo norm_info = node.normalization_info();
ARM_COMPUTE_ERROR_ON(input == nullptr);
ARM_COMPUTE_ERROR_ON(output == nullptr);
// Create and configure function
- auto func = support::cpp14::make_unique<NENormalizationLayer>(get_memory_manager(ctx, Target::NEON));
+ auto func = support::cpp14::make_unique<NENormalizationLayer>(get_memory_manager(ctx, NETargetInfo::TargetType));
func->configure(input, output, norm_info);
// Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated NENormalizationLayer"
+ ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated " << node.type()
+ << " Target " << NETargetInfo::TargetType
<< " Data Type: " << input->info()->data_type()
<< " Input shape: " << input->info()->tensor_shape()
<< " Output shape: " << output->info()->tensor_shape()
@@ -435,106 +163,7 @@
return std::move(func);
}
-
-/** Create a backend pooling layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend pooling layer function
- */
-std::unique_ptr<IFunction> create_pooling_layer(PoolingLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON PoolingLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ITensor *input = get_backing_tensor(node.input(0));
- ITensor *output = get_backing_tensor(node.output(0));
- const PoolingLayerInfo pool_info = node.pooling_info();
- ARM_COMPUTE_ERROR_ON(input == nullptr);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- // Create and configure function
- auto func = support::cpp14::make_unique<NEPoolingLayer>();
- func->configure(input, output, pool_info);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated NEPoolingLayer"
- << " Data Type: " << input->info()->data_type()
- << " Input shape: " << input->info()->tensor_shape()
- << " Output shape: " << output->info()->tensor_shape()
- << " Pooling info: " << pool_info.pool_type()
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend reshape layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend reshape layer function
- */
-std::unique_ptr<IFunction> create_reshape_layer(ReshapeLayerNode &node)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON ReshapeLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ITensor *input = get_backing_tensor(node.input(0));
- ITensor *output = get_backing_tensor(node.output(0));
- ARM_COMPUTE_ERROR_ON(input == nullptr);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- // Create and configure function
- auto func = support::cpp14::make_unique<NEReshapeLayer>();
- func->configure(input, output);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated NEReshapeLayer"
- << " Data Type: " << input->info()->data_type()
- << " Input shape: " << input->info()->tensor_shape()
- << " Output shape: " << output->info()->tensor_shape()
- << std::endl);
-
- return std::move(func);
-}
-
-/** Create a backend softmax layer function
- *
- * @param[in] node Node to create the backend function for
- *
- * @return Backend softmax layer function
- */
-std::unique_ptr<IFunction> create_softmax_layer(SoftmaxLayerNode &node, GraphContext &ctx)
-{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Creating NEON SoftmaxLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
- ARM_COMPUTE_ERROR_ON(node.num_inputs() != 1);
- ARM_COMPUTE_ERROR_ON(node.num_outputs() != 1);
-
- // Extract IO and info
- ITensor *input = get_backing_tensor(node.input(0));
- ITensor *output = get_backing_tensor(node.output(0));
- const float beta = node.beta();
- ARM_COMPUTE_ERROR_ON(input == nullptr);
- ARM_COMPUTE_ERROR_ON(output == nullptr);
-
- // Create and configure function
- auto func = support::cpp14::make_unique<NESoftmaxLayer>(get_memory_manager(ctx, Target::NEON));
- func->configure(input, output, beta);
-
- // Log info
- ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated NESoftmaxLayer"
- << " Data Type: " << input->info()->data_type()
- << " Input shape: " << input->info()->tensor_shape()
- << " Output shape: " << output->info()->tensor_shape()
- << std::endl);
-
- return std::move(func);
-}
-} // namespace
+} // namespace detail
std::unique_ptr<IFunction> NEFunctionFactory::create(INode *node, GraphContext &ctx)
{
@@ -547,33 +176,39 @@
switch(type)
{
case NodeType::ActivationLayer:
- return create_activation_layer(*polymorphic_downcast<ActivationLayerNode *>(node));
+ return detail::create_activation_layer<NEActivationLayer, NETargetInfo>(*polymorphic_downcast<ActivationLayerNode *>(node));
case NodeType::BatchNormalizationLayer:
- return create_batch_normalization_layer(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
+ return detail::create_batch_normalization_layer<NEBatchNormalizationLayer, NETargetInfo>(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
case NodeType::ConvolutionLayer:
- return create_convolution_layer(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
- case NodeType::DepthConcatenateLayer:
- return create_depth_concatenate_layer(*polymorphic_downcast<DepthConcatenateLayerNode *>(node));
+ return detail::create_convolution_layer<NEConvolutionLayerFunctions, NETargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
+ case NodeType::DeconvolutionLayer:
+ return detail::create_deconvolution_layer<NEDeconvolutionLayer, NETargetInfo>(*polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
+ case NodeType::ConcatenateLayer:
+ return detail::create_concatenate_layer<NEConcatenateLayer, NETargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
case NodeType::DepthwiseConvolutionLayer:
- return create_depthwise_convolution_layer(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+ return detail::create_depthwise_convolution_layer<NEDepthwiseConvolutionLayerFunctions, NETargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
case NodeType::EltwiseLayer:
- return create_eltwise_layer(*polymorphic_downcast<EltwiseLayerNode *>(node));
+ return detail::create_eltwise_layer<NEEltwiseFunctions, NETargetInfo>(*polymorphic_downcast<EltwiseLayerNode *>(node));
case NodeType::FlattenLayer:
- return create_flatten_layer(*polymorphic_downcast<FlattenLayerNode *>(node));
+ return detail::create_flatten_layer<NEFlattenLayer, NETargetInfo>(*polymorphic_downcast<FlattenLayerNode *>(node));
case NodeType::FullyConnectedLayer:
- return create_fully_connected_layer(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
+ return detail::create_fully_connected_layer<NEFullyConnectedLayer, NETargetInfo>(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
case NodeType::NormalizationLayer:
- return create_normalization_layer(*polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
+ return detail::create_normalization_layer<NENormalizationLayer, NETargetInfo>(*polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
+ case NodeType::PermuteLayer:
+ return detail::create_permute_layer<NEPermute, NETargetInfo>(*polymorphic_downcast<PermuteLayerNode *>(node));
case NodeType::PoolingLayer:
- return create_pooling_layer(*polymorphic_downcast<PoolingLayerNode *>(node));
+ return detail::create_pooling_layer<NEPoolingLayer, NETargetInfo>(*polymorphic_downcast<PoolingLayerNode *>(node));
case NodeType::ReshapeLayer:
- return create_reshape_layer(*polymorphic_downcast<ReshapeLayerNode *>(node));
+ return detail::create_reshape_layer<NEReshapeLayer, NETargetInfo>(*polymorphic_downcast<ReshapeLayerNode *>(node));
+ case NodeType::ResizeLayer:
+ return detail::create_resize_layer<NEScale, NETargetInfo>(*polymorphic_downcast<ResizeLayerNode *>(node));
case NodeType::SoftmaxLayer:
- return create_softmax_layer(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
+ return detail::create_softmax_layer<NESoftmaxLayer, NETargetInfo>(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
default:
return nullptr;
}
}
} // namespace backends
} // namespace graph
-} // namespace arm_compute
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/graph/backends/NEON/NENodeValidator.cpp b/src/graph/backends/NEON/NENodeValidator.cpp
index e438e79..58ffaf0 100644
--- a/src/graph/backends/NEON/NENodeValidator.cpp
+++ b/src/graph/backends/NEON/NENodeValidator.cpp
@@ -47,6 +47,8 @@
NodeType type = node->type();
switch(type)
{
+ case NodeType::ChannelShuffleLayer:
+ return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : ChannelShuffleLayer");
case NodeType::ConvolutionLayer:
return detail::validate_convolution_layer<NEConvolutionLayer,
NEDirectConvolutionLayer,
@@ -55,7 +57,8 @@
case NodeType::DepthwiseConvolutionLayer:
return detail::validate_depthwise_convolution_layer<NEDepthwiseConvolutionLayer,
NEDepthwiseConvolutionLayer3x3>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
-
+ case NodeType::PermuteLayer:
+ return detail::validate_permute_layer<NEPermute>(*polymorphic_downcast<PermuteLayerNode *>(node));
default:
return Status{};
}
diff --git a/src/graph/detail/ExecutionHelpers.cpp b/src/graph/detail/ExecutionHelpers.cpp
index c370fdf..f479963 100644
--- a/src/graph/detail/ExecutionHelpers.cpp
+++ b/src/graph/detail/ExecutionHelpers.cpp
@@ -35,14 +35,6 @@
{
namespace detail
{
-void default_initialize_backends()
-{
- for(const auto &backend : backends::BackendRegistry::get().backends())
- {
- backend.second->initialize_backend();
- }
-}
-
void validate_all_nodes(Graph &g)
{
auto &nodes = g.nodes();
@@ -52,10 +44,9 @@
{
if(node != nullptr)
{
- Target assigned_target = node->assigned_target();
- auto backend = backends::BackendRegistry::get().find_backend(assigned_target);
- ARM_COMPUTE_ERROR_ON_MSG(!backend, "Requested backend doesn't exist!");
- Status status = backend->validate_node(*node);
+ Target assigned_target = node->assigned_target();
+ backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(assigned_target);
+ Status status = backend.validate_node(*node);
ARM_COMPUTE_ERROR_ON_MSG(!bool(status), status.error_description().c_str());
}
}
@@ -67,13 +58,12 @@
for(auto &tensor : tensors)
{
- if(tensor)
+ if(tensor && tensor->handle() == nullptr)
{
- Target target = tensor->desc().target;
- auto backend = backends::BackendRegistry::get().find_backend(target);
- ARM_COMPUTE_ERROR_ON_MSG(!backend, "Requested backend doesn't exist!");
- auto handle = backend->create_tensor(*tensor);
- ARM_COMPUTE_ERROR_ON_MSG(!backend, "Couldn't create backend handle!");
+ Target target = tensor->desc().target;
+ backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(target);
+ std::unique_ptr<ITensorHandle> handle = backend.create_tensor(*tensor);
+ ARM_COMPUTE_ERROR_ON_MSG(!handle, "Couldn't create backend handle!");
tensor->set_handle(std::move(handle));
}
}
@@ -139,35 +129,33 @@
}
}
-ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx)
+ExecutionWorkload configure_all_nodes(Graph &g, GraphContext &ctx, const std::vector<NodeID> &node_order)
{
ExecutionWorkload workload;
workload.graph = &g;
workload.ctx = &ctx;
- auto &nodes = g.nodes();
-
// Create tasks
- for(auto &node : nodes)
+ for(auto &node_id : node_order)
{
+ auto node = g.node(node_id);
if(node != nullptr)
{
- Target assigned_target = node->assigned_target();
- auto backend = backends::BackendRegistry::get().find_backend(assigned_target);
- ARM_COMPUTE_ERROR_ON_MSG(!backend, "Requested backend doesn't exist!");
- auto func = backend->configure_node(*node, ctx);
+ Target assigned_target = node->assigned_target();
+ backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(assigned_target);
+ std::unique_ptr<IFunction> func = backend.configure_node(*node, ctx);
if(func != nullptr)
{
ExecutionTask task;
task.task = std::move(func);
- task.node = node.get();
+ task.node = node;
workload.tasks.push_back(std::move(task));
}
}
}
// Add inputs and outputs
- for(auto &node : nodes)
+ for(auto &node : g.nodes())
{
if(node != nullptr && node->type() == NodeType::Input)
{
@@ -214,15 +202,12 @@
}
}
-void call_all_input_node_accessors(ExecutionWorkload &workload)
+bool call_all_input_node_accessors(ExecutionWorkload &workload)
{
- for(auto &input : workload.inputs)
+ return !std::any_of(std::begin(workload.inputs), std::end(workload.inputs), [](Tensor * input_tensor)
{
- if(input != nullptr)
- {
- input->call_accessor();
- }
- }
+ return (input_tensor == nullptr) || !input_tensor->call_accessor();
+ });
}
void prepare_all_tasks(ExecutionWorkload &workload)
@@ -264,16 +249,16 @@
}
}
-void call_all_output_node_accessors(ExecutionWorkload &workload)
+bool call_all_output_node_accessors(ExecutionWorkload &workload)
{
- for(auto &output : workload.outputs)
+ bool is_valid = true;
+ std::for_each(std::begin(workload.outputs), std::end(workload.outputs), [&](Tensor * output_tensor)
{
- if(output != nullptr)
- {
- output->call_accessor();
- }
- }
+ is_valid = is_valid && (output_tensor != nullptr) && output_tensor->call_accessor();
+ });
+
+ return is_valid;
}
} // namespace detail
} // namespace graph
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/frontend/Stream.cpp b/src/graph/frontend/Stream.cpp
index 96a166c..878d688 100644
--- a/src/graph/frontend/Stream.cpp
+++ b/src/graph/frontend/Stream.cpp
@@ -33,7 +33,7 @@
namespace frontend
{
Stream::Stream(size_t id, std::string name)
- : _manager(), _ctx(), _g(id, std::move(name))
+ : _ctx(), _manager(), _g(id, std::move(name))
{
}
@@ -66,4 +66,4 @@
}
} // namespace frontend
} // namespace graph
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/mutators/DepthConcatSubTensorMutator.cpp b/src/graph/mutators/DepthConcatSubTensorMutator.cpp
index c56f4c5..a170c4d 100644
--- a/src/graph/mutators/DepthConcatSubTensorMutator.cpp
+++ b/src/graph/mutators/DepthConcatSubTensorMutator.cpp
@@ -25,8 +25,10 @@
#include "arm_compute/graph/Graph.h"
#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Utils.h"
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
#include "arm_compute/graph/backends/BackendRegistry.h"
-#include "arm_compute/graph/nodes/DepthConcatenateLayerNode.h"
+#include "arm_compute/graph/nodes/ConcatenateLayerNode.h"
#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/core/utils/misc/Iterable.h"
@@ -42,14 +44,31 @@
void DepthConcatSubTensorMutator::mutate(Graph &g)
{
- // Should be in reverse order of execution
- for(auto &node : arm_compute::utils::iterable::reverse_iterate(g.nodes()))
+ // Early exit if no Concatenation layers exist in graph
+ if(g.nodes(NodeType::ConcatenateLayer).empty())
{
- if(node && node->type() == NodeType::DepthConcatenateLayer && node->output(0) != nullptr)
+ return;
+ }
+
+ // Perform topological sort
+ std::vector<NodeID> topological_sorted_node_ids = dfs(g);
+
+ // Should be in reverse order of execution
+ for(auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids))
+ {
+ INode *node = g.node(node_id);
+ if(node != nullptr && node->type() == NodeType::ConcatenateLayer && node->output(0) != nullptr)
{
// Get output tensor
auto output_tensor = node->output(0);
+ // Check concatenation axis (Sub-tensor optimization is support for concatenation axis >=2)
+ auto *concat_node = arm_compute::utils::cast::polymorphic_downcast<ConcatenateLayerNode *>(node);
+ if(output_tensor == nullptr || get_dimension_idx(output_tensor->desc(), concat_node->concatenation_axis()) < 2)
+ {
+ continue;
+ }
+
// Check that all tensor have the same target and valid inputs
bool is_valid = std::all_of(node->input_edges().cbegin(), node->input_edges().cend(),
[&](const EdgeID & eid)
@@ -58,7 +77,7 @@
});
// Create subtensors
- if(is_valid && backends::BackendRegistry::get().find_backend(output_tensor->desc().target) != nullptr)
+ if(is_valid && is_target_supported(output_tensor->desc().target))
{
ARM_COMPUTE_LOG_GRAPH_VERBOSE("Using sub-tensors for the node with ID : "
<< node->id() << " and name : " << node->name() << std::endl);
@@ -69,14 +88,14 @@
auto input_tensor = node->input(i);
const auto input_shape = input_tensor->desc().shape;
- auto backend = backends::BackendRegistry::get().find_backend(input_tensor->desc().target);
- auto handle = backend->create_subtensor(output_tensor->handle(), input_shape, Coordinates(0, 0, depth), false);
+ backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(input_tensor->desc().target);
+ std::unique_ptr<ITensorHandle> handle = backend.create_subtensor(output_tensor->handle(), input_shape, Coordinates(0, 0, depth), false);
input_tensor->set_handle(std::move(handle));
depth += input_shape.z();
}
- auto *dc_node = arm_compute::utils::cast::polymorphic_downcast<DepthConcatenateLayerNode *>(node.get());
+ auto *dc_node = arm_compute::utils::cast::polymorphic_downcast<ConcatenateLayerNode *>(node);
dc_node->set_enabled(false);
}
}
diff --git a/src/graph/mutators/GroupedConvolutionMutator.cpp b/src/graph/mutators/GroupedConvolutionMutator.cpp
new file mode 100644
index 0000000..0d65d6a
--- /dev/null
+++ b/src/graph/mutators/GroupedConvolutionMutator.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/mutators/GroupedConvolutionMutator.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/GraphBuilder.h"
+#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Utils.h"
+#include "arm_compute/graph/backends/BackendRegistry.h"
+#include "arm_compute/graph/nodes/Nodes.h"
+
+#include "arm_compute/core/utils/misc/Cast.h"
+
+#include <set>
+
+namespace arm_compute
+{
+namespace graph
+{
+namespace
+{
+NodeID create_grouped_convolution(Graph &g, const NodeParams ¶ms, NodeIdxPair input, NodeID weights, NodeID bias,
+ PadStrideInfo conv_info, ConvolutionMethod method, FastMathHint fast_math_hint, unsigned int num_groups)
+{
+ bool has_bias = (bias != EmptyNodeID);
+
+ // Split input
+ const TensorDescriptor input_tensor_desc = get_tensor_descriptor(g, g.node(input.node_id)->outputs()[0]);
+ const unsigned int input_idx = get_dimension_idx(input_tensor_desc, DataLayoutDimension::CHANNEL);
+ NodeID input_split = GraphBuilder::add_split_node(g, params, input, num_groups, input_idx);
+
+ // Split weights
+ const TensorDescriptor weights_tensor_desc = get_tensor_descriptor(g, g.node(weights)->outputs()[0]);
+ const unsigned int batch_idx = get_dimension_idx(weights_tensor_desc, DataLayoutDimension::BATCHES);
+ NodeID weights_split = GraphBuilder::add_split_node(g, params, { weights, 0 }, num_groups, batch_idx);
+
+ // Split bias
+ NodeID bias_split = EmptyNodeID;
+ if(has_bias)
+ {
+ // Split bias
+ bias_split = GraphBuilder::add_split_node(g, params, { bias, 0 }, num_groups, 0);
+ }
+
+ std::vector<NodeIdxPair> convolution_outputs;
+ for(unsigned int i = 0; i < num_groups; ++i)
+ {
+ NodeParams group_params = params;
+ NodeID conv_nid = g.add_node<ConvolutionLayerNode>(conv_info, 1, method, fast_math_hint);
+ g.add_connection(input_split, i, conv_nid, 0);
+ g.add_connection(weights_split, i, conv_nid, 1);
+ if(has_bias)
+ {
+ g.add_connection(bias_split, i, conv_nid, 2);
+ }
+
+ // Add group name
+ if(!group_params.name.empty())
+ {
+ group_params.name.append("_g" + arm_compute::support::cpp11::to_string(i));
+ }
+
+ // Set node parameters
+ INode *node = g.node(conv_nid);
+ ARM_COMPUTE_ERROR_ON(node == nullptr);
+ node->set_common_node_parameters(group_params);
+
+ convolution_outputs.push_back({ conv_nid, 0 });
+ }
+
+ // Depth concatenate output
+ return GraphBuilder::add_concatenate_node(g, params, convolution_outputs, DataLayoutDimension::CHANNEL);
+}
+} // namespace
+
+const char *GroupedConvolutionMutator::name()
+{
+ return "GroupedConvolutionMutator";
+}
+
+void GroupedConvolutionMutator::mutate(Graph &g)
+{
+ // Early exit if no Convolution layers exist in graph
+ if(g.nodes(NodeType::ConvolutionLayer).empty())
+ {
+ return;
+ }
+
+ // Total nodes
+ size_t total_nodes = g.nodes().size();
+
+ // Iterate over convolution nodes
+ for(unsigned int i = 0; i < total_nodes; ++i)
+ {
+ INode *node = g.node(i);
+ if(node != nullptr && node->type() == NodeType::ConvolutionLayer && arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node)->num_groups() != 1)
+ {
+ // Validate node
+ backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(node->assigned_target());
+ Status status = backend.validate_node(*node);
+
+ // If grouped convolution is not supported
+ if(!bool(status))
+ {
+ // Down-cast node
+ auto *conv_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(node);
+
+ // Get internal convolution info
+ const PadStrideInfo conv_info = conv_node->convolution_info();
+ const ConvolutionMethod conv_method = conv_node->convolution_method();
+ const FastMathHint fast_math_hint = conv_node->fast_math_hint();
+ const unsigned int num_groups = conv_node->num_groups();
+ const NodeParams params = conv_node->common_node_params();
+ const Target assigned_target = conv_node->assigned_target();
+
+ // Extract node ids
+ const NodeID input_id = conv_node->input_id(0);
+ const NodeID weights_id = conv_node->input_id(1);
+ const NodeID bias_id = conv_node->input_id(2);
+
+ // Get driving nodes
+ std::vector<NodeIdxPair> driving_nodes = get_driving_nodes(*node);
+
+ // Extract activation node accessor if any
+ auto node_accessor = conv_node->output(0)->extract_accessor();
+
+ // Current max tensor and node id
+ TensorID latest_tid = g.tensors().size();
+ NodeID latest_nid = g.nodes().size();
+
+ // Create grouped convolution node
+ NodeID grouped_conv_id = create_grouped_convolution(g, params, { input_id, 0 }, weights_id, bias_id,
+ conv_info, conv_method, fast_math_hint, num_groups);
+
+ // Remove convolution node
+ g.remove_node(node->id());
+
+ // Update batch normalization node outputs
+ for(auto &driving_node : driving_nodes)
+ {
+ g.add_connection(grouped_conv_id, 0, driving_node.node_id, driving_node.index);
+ }
+
+ // Update accessor to batch normalization node
+ g.node(grouped_conv_id)->output(0)->set_accessor(std::move(node_accessor));
+
+ // Configure new tensors and nodes
+ std::for_each(g.tensors().begin() + latest_tid, g.tensors().end(), [](std::unique_ptr<Tensor> &t)
+ {
+ configure_tensor(t.get());
+ });
+ std::for_each(g.nodes().begin() + latest_nid, g.nodes().end(), [&assigned_target](std::unique_ptr<INode> &n)
+ {
+ if(n != nullptr)
+ {
+ n->set_assigned_target(assigned_target);
+ }
+ });
+ }
+ }
+ }
+}
+} // namespace graph
+} // namespace arm_compute
diff --git a/src/graph/mutators/InPlaceOperationMutator.cpp b/src/graph/mutators/InPlaceOperationMutator.cpp
index bd3f098..31921b3 100644
--- a/src/graph/mutators/InPlaceOperationMutator.cpp
+++ b/src/graph/mutators/InPlaceOperationMutator.cpp
@@ -50,11 +50,26 @@
// Check if parent has a single output if yes then force in place calculation else not
if((input_edge != nullptr) && (input_edge->producer() != nullptr) && (input_edge->producer()->output_edges().size() == 1))
{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Switching to in-place computation for the node with ID : "
- << node->id() << " and name : " << node->name() << std::endl);
- // Update output
- auto tensor = input_edge->tensor();
- node->set_output_tensor(tensor->id(), 0);
+ // Get current and new output tensors
+ auto current_output_tensor = node->output(0);
+ auto new_output_tensor = input_edge->tensor();
+
+ ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr || new_output_tensor == nullptr);
+
+ // Prevent in-place operation if there is an accessor bound to the in-place tensor
+ if(new_output_tensor->accessor() == nullptr)
+ {
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Switching to in-place computation for the node with ID : "
+ << node->id() << " and name : " << node->name() << std::endl);
+ // Update accessor
+ new_output_tensor->set_accessor(current_output_tensor->extract_accessor());
+ // Update output
+ node->set_output_tensor(new_output_tensor->id(), 0);
+ }
+ else
+ {
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor\n");
+ }
}
}
}
diff --git a/src/graph/mutators/NodeExecutionMethodMutator.cpp b/src/graph/mutators/NodeExecutionMethodMutator.cpp
new file mode 100644
index 0000000..b420121
--- /dev/null
+++ b/src/graph/mutators/NodeExecutionMethodMutator.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/mutators/NodeExecutionMethodMutator.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Utils.h"
+#include "arm_compute/graph/backends/BackendRegistry.h"
+#include "arm_compute/graph/nodes/Nodes.h"
+
+#include "arm_compute/core/utils/misc/Cast.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+namespace
+{
+/** Runs a default setter function on a given types of nodes
+ *
+ * @tparam Setter Setter function to run
+ *
+ * @param[in, out] g Graph to extract the nodes from
+ * @param[in] node_type Node type
+ * @param[in] setter Setter function
+ */
+template <typename Setter>
+void set_default_on_invalid_method(Graph &g, NodeType node_type, Setter &&setter)
+{
+ const std::vector<NodeID> &node_ids = g.nodes(node_type);
+ for(auto &node_id : node_ids)
+ {
+ INode *node = g.node(node_id);
+ if(node != nullptr)
+ {
+ // Validate node
+ backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(node->assigned_target());
+ Status status = backend.validate_node(*node);
+
+ // Set default execution method in case of failure
+ if(!bool(status))
+ {
+ setter(node);
+ }
+ }
+ }
+}
+} // namespace
+
+const char *NodeExecutionMethodMutator::name()
+{
+ return "NodeExecutionMethodMutator";
+}
+
+void NodeExecutionMethodMutator::mutate(Graph &g)
+{
+ // Convolution Layer
+ set_default_on_invalid_method(g, NodeType::ConvolutionLayer, [](INode * n)
+ {
+ ARM_COMPUTE_LOG_GRAPH_INFO("Switched ConvolutionLayer method of node with ID : "
+ << n->id() << " and Name: " << n->name() << std::endl);
+ auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<ConvolutionLayerNode *>(n);
+ casted_node->set_convolution_method(ConvolutionMethod::Default);
+ });
+
+ // Depthwise Convolution Layer
+ set_default_on_invalid_method(g, NodeType::DepthwiseConvolutionLayer, [](INode * n)
+ {
+ ARM_COMPUTE_LOG_GRAPH_INFO("Switched Depthwise ConvolutionLayer method of node with ID : "
+ << n->id() << " and Name: " << n->name() << std::endl);
+ auto *casted_node = arm_compute::utils::cast::polymorphic_downcast<DepthwiseConvolutionLayerNode *>(n);
+ casted_node->set_depthwise_convolution_method(DepthwiseConvolutionMethod::Default);
+ });
+}
+} // namespace graph
+} // namespace arm_compute
diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp
index 2e893c2..82bfe25 100644
--- a/src/graph/mutators/NodeFusionMutator.cpp
+++ b/src/graph/mutators/NodeFusionMutator.cpp
@@ -25,10 +25,13 @@
#include "arm_compute/graph/Graph.h"
#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Utils.h"
#include "arm_compute/graph/nodes/Nodes.h"
#include "arm_compute/core/utils/misc/Cast.h"
+#include <set>
+
namespace arm_compute
{
namespace graph
@@ -37,6 +40,9 @@
{
void fuse_batch_norm_with_activation(Graph &g)
{
+ // Supported activations when fusing
+ const std::set<Activation> supported_fused_activations = { Activation::RELU, Activation::BOUNDED_RELU, Activation::LU_BOUNDED_RELU };
+
// Not interested in the order of nodes
for(auto &node : g.nodes())
{
@@ -48,34 +54,47 @@
// Check if following node is an activation layer node
if((output_edge != nullptr) && (output_edge->consumer() != nullptr) && (output_edge->consumer()->type() == NodeType::ActivationLayer))
{
- ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing Batch Normalization node with ID : " << output_edge->producer_id()
- << " with Activation Layer node with ID : " << output_edge->consumer_id() << std::endl);
-
auto *bn_node = arm_compute::utils::cast::polymorphic_downcast<BatchNormalizationLayerNode *>(output_edge->producer());
auto *act_node = arm_compute::utils::cast::polymorphic_downcast<ActivationLayerNode *>(output_edge->consumer());
- // Get driving nodes of activation node
- std::vector<NodeIdxPair> act_driving_nodes;
- for(auto &act_output_edge_id : act_node->output_edges())
+ ARM_COMPUTE_ERROR_ON(act_node->output(0) == nullptr || bn_node->output(0) == nullptr);
+
+ // Check if activation is supported for fusion
+ if(supported_fused_activations.count(act_node->activation_info().activation()) == 0)
{
- auto act_output_edge = g.edge(act_output_edge_id);
- if(act_output_edge != nullptr)
- {
- ARM_COMPUTE_ERROR_ON(act_output_edge->consumer() == nullptr);
- act_driving_nodes.push_back({ act_output_edge->consumer_id(), act_output_edge->consumer_idx() });
- }
+ continue;
}
- // Set activation info to batch normalization
- bn_node->set_fused_activation(act_node->activation_info());
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Fusing Batch Normalization node with ID : " << output_edge->producer_id()
+ << " with Activation Layer node with ID : " << output_edge->consumer_id() << std::endl);
- // Remove activation node
- g.remove_node(act_node->id());
-
- // Update batch normalization node outputs
- for(auto &driving_node : act_driving_nodes)
+ // Prevent fusion if batch normalization node has an output accessor
+ if(bn_node->output(0)->accessor() == nullptr)
{
- g.add_connection(bn_node->id(), 0, driving_node.node_id, driving_node.index);
+ // Get driving nodes of activation node
+ std::vector<NodeIdxPair> act_driving_nodes = get_driving_nodes(*act_node);
+
+ // Set activation info to batch normalization
+ bn_node->set_fused_activation(act_node->activation_info());
+
+ // Extract activation node accessor if any
+ auto act_node_accessor = act_node->output(0)->extract_accessor();
+
+ // Remove activation node
+ g.remove_node(act_node->id());
+
+ // Update batch normalization node outputs
+ for(auto &driving_node : act_driving_nodes)
+ {
+ g.add_connection(bn_node->id(), 0, driving_node.node_id, driving_node.index);
+ }
+
+ // Update accessor to batch normalization node
+ bn_node->output(0)->set_accessor(std::move(act_node_accessor));
+ }
+ else
+ {
+ ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented fusion as batch normalization node has an output accessor\n");
}
}
}
diff --git a/src/graph/mutators/SplitLayerSubTensorMutator.cpp b/src/graph/mutators/SplitLayerSubTensorMutator.cpp
index 2a8c029..e21252a 100644
--- a/src/graph/mutators/SplitLayerSubTensorMutator.cpp
+++ b/src/graph/mutators/SplitLayerSubTensorMutator.cpp
@@ -25,6 +25,8 @@
#include "arm_compute/graph/Graph.h"
#include "arm_compute/graph/Logger.h"
+#include "arm_compute/graph/Utils.h"
+#include "arm_compute/graph/algorithms/TopologicalSort.h"
#include "arm_compute/graph/backends/BackendRegistry.h"
#include "arm_compute/graph/nodes/SplitLayerNode.h"
@@ -42,10 +44,20 @@
void SplitLayerSubTensorMutator::mutate(Graph &g)
{
- // Should be in reverse order of execution
- for(auto &node : arm_compute::utils::iterable::reverse_iterate(g.nodes()))
+ // Early exit if no Split layers exist in graph
+ if(g.nodes(NodeType::SplitLayer).empty())
{
- if(node && node->type() == NodeType::SplitLayer && node->input(0) != nullptr)
+ return;
+ }
+
+ // Perform topological sort
+ std::vector<NodeID> topological_sorted_node_ids = dfs(g);
+
+ // Should be in reverse order of execution
+ for(auto &node_id : arm_compute::utils::iterable::reverse_iterate(topological_sorted_node_ids))
+ {
+ INode *node = g.node(node_id);
+ if(node != nullptr && node->type() == NodeType::SplitLayer && node->input(0) != nullptr)
{
// Get output tensor
Tensor *input_tensor = node->input(0);
@@ -58,12 +70,12 @@
});
// Create subtensors
- if(is_valid && backends::BackendRegistry::get().find_backend(input_tensor->desc().target) != nullptr)
+ if(is_valid && is_target_supported(input_tensor->desc().target))
{
ARM_COMPUTE_LOG_GRAPH_VERBOSE("Using sub-tensors for the node with ID : "
<< node->id() << " and name : " << node->name() << std::endl);
- auto *split_node = arm_compute::utils::cast::polymorphic_downcast<SplitLayerNode *>(node.get());
+ auto *split_node = arm_compute::utils::cast::polymorphic_downcast<SplitLayerNode *>(node);
const unsigned int axis = split_node->axis();
const unsigned int num_splits = split_node->num_splits();
@@ -77,8 +89,8 @@
Coordinates coords;
std::tie(std::ignore, coords) = SplitLayerNode::compute_output_descriptor(input_tensor->desc(), num_splits, axis, i);
- backends::IDeviceBackend *backend = backends::BackendRegistry::get().find_backend(output_tensor->desc().target);
- std::unique_ptr<ITensorHandle> handle = backend->create_subtensor(input_tensor->handle(), output_shape, coords, extend_parent);
+ backends::IDeviceBackend &backend = backends::BackendRegistry::get().get_backend(output_tensor->desc().target);
+ std::unique_ptr<ITensorHandle> handle = backend.create_subtensor(input_tensor->handle(), output_shape, coords, extend_parent);
output_tensor->set_handle(std::move(handle));
}
}
diff --git a/src/graph/nodes/ChannelShuffleLayerNode.cpp b/src/graph/nodes/ChannelShuffleLayerNode.cpp
new file mode 100644
index 0000000..08fcce1
--- /dev/null
+++ b/src/graph/nodes/ChannelShuffleLayerNode.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/nodes/ChannelShuffleLayerNode.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/INodeVisitor.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+ChannelShuffleLayerNode::ChannelShuffleLayerNode(unsigned int num_groups)
+ : _num_groups(num_groups)
+{
+ _input_edges.resize(1, EmptyEdgeID);
+ _outputs.resize(1, NullTensorID);
+}
+
+unsigned int ChannelShuffleLayerNode::num_groups() const
+{
+ return _num_groups;
+}
+
+bool ChannelShuffleLayerNode::forward_descriptors()
+{
+ if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+ {
+ Tensor *dst = output(0);
+ ARM_COMPUTE_ERROR_ON(dst == nullptr);
+ dst->desc() = configure_output(0);
+ return true;
+ }
+ return false;
+}
+
+TensorDescriptor ChannelShuffleLayerNode::configure_output(size_t idx) const
+{
+ ARM_COMPUTE_UNUSED(idx);
+ ARM_COMPUTE_ERROR_ON(idx >= _outputs.size());
+
+ const Tensor *src = input(0);
+ ARM_COMPUTE_ERROR_ON(src == nullptr);
+
+ return src->desc();
+}
+
+NodeType ChannelShuffleLayerNode::type() const
+{
+ return NodeType::ChannelShuffleLayer;
+}
+
+void ChannelShuffleLayerNode::accept(INodeVisitor &v)
+{
+ v.visit(*this);
+}
+} // namespace graph
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/graph/nodes/DepthConcatenateLayerNode.cpp b/src/graph/nodes/ConcatenateLayerNode.cpp
similarity index 61%
rename from src/graph/nodes/DepthConcatenateLayerNode.cpp
rename to src/graph/nodes/ConcatenateLayerNode.cpp
index 08cccc1..ade3f6e 100644
--- a/src/graph/nodes/DepthConcatenateLayerNode.cpp
+++ b/src/graph/nodes/ConcatenateLayerNode.cpp
@@ -21,58 +21,74 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/graph/nodes/DepthConcatenateLayerNode.h"
+#include "arm_compute/graph/nodes/ConcatenateLayerNode.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/graph/Graph.h"
#include "arm_compute/graph/INodeVisitor.h"
+#include "arm_compute/graph/Utils.h"
+
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
namespace arm_compute
{
namespace graph
{
-DepthConcatenateLayerNode::DepthConcatenateLayerNode(unsigned int total_nodes)
- : _total_nodes(total_nodes), _is_enabled(true)
+ConcatenateLayerNode::ConcatenateLayerNode(unsigned int total_nodes, DataLayoutDimension axis)
+ : _total_nodes(total_nodes), _axis(axis), _is_enabled(true)
{
_input_edges.resize(_total_nodes, EmptyEdgeID);
_outputs.resize(1, NullTensorID);
}
-void DepthConcatenateLayerNode::set_enabled(bool is_enabled)
+void ConcatenateLayerNode::set_enabled(bool is_enabled)
{
_is_enabled = is_enabled;
}
-bool DepthConcatenateLayerNode::is_enabled() const
+bool ConcatenateLayerNode::is_enabled() const
{
return _is_enabled;
}
-TensorDescriptor DepthConcatenateLayerNode::compute_output_descriptor(const std::vector<TensorDescriptor> &input_descriptors)
+DataLayoutDimension ConcatenateLayerNode::concatenation_axis() const
+{
+ return _axis;
+}
+
+TensorDescriptor ConcatenateLayerNode::compute_output_descriptor(const std::vector<TensorDescriptor> &input_descriptors,
+ DataLayoutDimension axis)
{
ARM_COMPUTE_ERROR_ON(input_descriptors.size() == 0);
TensorDescriptor output_descriptor = input_descriptors[0];
+ const int axis_idx = get_dimension_idx(output_descriptor, axis);
- size_t max_x = 0;
- size_t max_y = 0;
- size_t depth = 0;
-
- for(const auto &input_descriptor : input_descriptors)
+ // Extract shapes
+ std::vector<const TensorShape *> shapes;
+ for(auto &input_descriptor : input_descriptors)
{
- max_x = std::max(input_descriptor.shape.x(), max_x);
- max_y = std::max(input_descriptor.shape.y(), max_y);
- depth += input_descriptor.shape.z();
+ shapes.emplace_back(&input_descriptor.shape);
}
- output_descriptor.shape.set(0, max_x);
- output_descriptor.shape.set(1, max_y);
- output_descriptor.shape.set(2, depth);
+ // Calculate output shape
+ if(axis_idx == 0)
+ {
+ output_descriptor.shape = arm_compute::misc::shape_calculator::calculate_width_concatenate_shape(shapes);
+ }
+ else if(axis_idx == 2)
+ {
+ output_descriptor.shape = arm_compute::misc::shape_calculator::calculate_depth_concatenate_shape(shapes);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unsupported concatenation axis!");
+ }
return output_descriptor;
}
-bool DepthConcatenateLayerNode::forward_descriptors()
+bool ConcatenateLayerNode::forward_descriptors()
{
if(_outputs[0] != NullTensorID)
{
@@ -84,7 +100,7 @@
return false;
}
-TensorDescriptor DepthConcatenateLayerNode::configure_output(size_t idx) const
+TensorDescriptor ConcatenateLayerNode::configure_output(size_t idx) const
{
ARM_COMPUTE_UNUSED(idx);
ARM_COMPUTE_ERROR_ON(idx >= _outputs.size());
@@ -106,18 +122,18 @@
ARM_COMPUTE_ERROR_ON(t == nullptr);
inputs_descriptors.push_back(t->desc());
}
- output_info = compute_output_descriptor(inputs_descriptors);
+ output_info = compute_output_descriptor(inputs_descriptors, _axis);
}
return output_info;
}
-NodeType DepthConcatenateLayerNode::type() const
+NodeType ConcatenateLayerNode::type() const
{
- return NodeType::DepthConcatenateLayer;
+ return NodeType::ConcatenateLayer;
}
-void DepthConcatenateLayerNode::accept(INodeVisitor &v)
+void ConcatenateLayerNode::accept(INodeVisitor &v)
{
v.visit(*this);
}
diff --git a/src/graph/nodes/ConvolutionLayerNode.cpp b/src/graph/nodes/ConvolutionLayerNode.cpp
index 6c31a6b..e9cb039 100644
--- a/src/graph/nodes/ConvolutionLayerNode.cpp
+++ b/src/graph/nodes/ConvolutionLayerNode.cpp
@@ -32,8 +32,12 @@
{
namespace graph
{
-ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo info, ConvolutionMethod method, FastMathHint fast_math_hint, QuantizationInfo out_quant_info)
- : _info(std::move(info)), _method(method), _fast_math_hint(fast_math_hint), _out_quant_info(out_quant_info)
+ConvolutionLayerNode::ConvolutionLayerNode(PadStrideInfo info,
+ unsigned int num_groups,
+ ConvolutionMethod method,
+ FastMathHint fast_math_hint,
+ QuantizationInfo out_quant_info)
+ : _info(std::move(info)), _num_groups(num_groups), _method(method), _fast_math_hint(fast_math_hint), _out_quant_info(out_quant_info)
{
_input_edges.resize(3, EmptyEdgeID);
_outputs.resize(1, NullTensorID);
@@ -64,6 +68,11 @@
return _info;
}
+unsigned int ConvolutionLayerNode::num_groups() const
+{
+ return _num_groups;
+}
+
TensorDescriptor ConvolutionLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
const TensorDescriptor &weights_descriptor,
const PadStrideInfo &info)
@@ -125,4 +134,4 @@
v.visit(*this);
}
} // namespace graph
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/graph/nodes/DeconvolutionLayerNode.cpp b/src/graph/nodes/DeconvolutionLayerNode.cpp
new file mode 100644
index 0000000..9329ae3
--- /dev/null
+++ b/src/graph/nodes/DeconvolutionLayerNode.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/nodes/DeconvolutionLayerNode.h"
+
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/INodeVisitor.h"
+#include "arm_compute/graph/Utils.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+DeconvolutionLayerNode::DeconvolutionLayerNode(PadStrideInfo info, Size2D inner_border)
+ : _info(std::move(info)), _inner_border(inner_border)
+{
+ _input_edges.resize(3, EmptyEdgeID);
+ _outputs.resize(1, NullTensorID);
+}
+
+PadStrideInfo DeconvolutionLayerNode::deconvolution_info() const
+{
+ return _info;
+}
+
+Size2D DeconvolutionLayerNode::inner_border() const
+{
+ return _inner_border;
+}
+
+TensorDescriptor DeconvolutionLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
+ const TensorDescriptor &weights_descriptor,
+ const PadStrideInfo &info,
+ const Size2D &inner_border)
+{
+ unsigned int output_width = 0;
+ unsigned int output_height = 0;
+
+ const unsigned int input_width = get_dimension_size(input_descriptor, DataLayoutDimension::WIDTH);
+ const unsigned int input_height = get_dimension_size(input_descriptor, DataLayoutDimension::HEIGHT);
+ const unsigned int kernel_width = get_dimension_size(weights_descriptor, DataLayoutDimension::WIDTH);
+ const unsigned int kernel_height = get_dimension_size(weights_descriptor, DataLayoutDimension::HEIGHT);
+
+ std::tie(output_width, output_height) = deconvolution_output_dimensions(input_width, input_height,
+ kernel_width, kernel_height,
+ info.pad().first, info.pad().second,
+ inner_border.x(), inner_border.y(),
+ info.stride().first, info.stride().second);
+
+ TensorDescriptor output_descriptor = input_descriptor;
+ output_descriptor.shape.set(get_dimension_idx(output_descriptor, DataLayoutDimension::WIDTH), output_width);
+ output_descriptor.shape.set(get_dimension_idx(output_descriptor, DataLayoutDimension::HEIGHT), output_height);
+ output_descriptor.shape.set(get_dimension_idx(output_descriptor, DataLayoutDimension::CHANNEL), weights_descriptor.shape[3]);
+
+ return output_descriptor;
+}
+
+bool DeconvolutionLayerNode::forward_descriptors()
+{
+ if((input_id(0) != NullTensorID) && (input_id(1) != NullTensorID) && (output_id(0) != NullTensorID))
+ {
+ Tensor *dst = output(0);
+ ARM_COMPUTE_ERROR_ON(dst == nullptr);
+ dst->desc() = configure_output(0);
+ return true;
+ }
+ return false;
+}
+
+TensorDescriptor DeconvolutionLayerNode::configure_output(size_t idx) const
+{
+ ARM_COMPUTE_UNUSED(idx);
+ const Tensor *src = input(0);
+ const Tensor *weights = input(1);
+
+ ARM_COMPUTE_ERROR_ON(src == nullptr || weights == nullptr);
+
+ TensorDescriptor output_info = compute_output_descriptor(src->desc(), weights->desc(), _info, _inner_border);
+ return output_info;
+}
+
+NodeType DeconvolutionLayerNode::type() const
+{
+ return NodeType::DeconvolutionLayer;
+}
+
+void DeconvolutionLayerNode::accept(INodeVisitor &v)
+{
+ v.visit(*this);
+}
+} // namespace graph
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/graph/nodes/DummyNode.cpp b/src/graph/nodes/DummyNode.cpp
new file mode 100644
index 0000000..e641181
--- /dev/null
+++ b/src/graph/nodes/DummyNode.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/nodes/DummyNode.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/INodeVisitor.h"
+#include "arm_compute/graph/Tensor.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+DummyNode::DummyNode(TensorShape shape)
+ : _shape(shape)
+{
+ _input_edges.resize(1, EmptyEdgeID);
+ _outputs.resize(1, NullTensorID);
+}
+
+bool DummyNode::forward_descriptors()
+{
+ if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+ {
+ Tensor *dst = output(0);
+ ARM_COMPUTE_ERROR_ON(dst == nullptr);
+ dst->desc() = configure_output(0);
+ return true;
+ }
+ return false;
+}
+
+TensorDescriptor DummyNode::configure_output(size_t idx) const
+{
+ ARM_COMPUTE_UNUSED(idx);
+ ARM_COMPUTE_ERROR_ON(idx >= _outputs.size());
+
+ const Tensor *src = input(0);
+ ARM_COMPUTE_ERROR_ON(src == nullptr);
+
+ TensorDescriptor output_desc = src->desc();
+ output_desc.shape = _shape;
+
+ return output_desc;
+}
+
+NodeType DummyNode::type() const
+{
+ return NodeType::Dummy;
+}
+
+void DummyNode::accept(INodeVisitor &v)
+{
+ v.visit(*this);
+}
+} // namespace graph
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/graph/nodes/FullyConnectedLayer.cpp b/src/graph/nodes/FullyConnectedLayer.cpp
index d94a785..6ea0292 100644
--- a/src/graph/nodes/FullyConnectedLayer.cpp
+++ b/src/graph/nodes/FullyConnectedLayer.cpp
@@ -31,15 +31,17 @@
{
namespace graph
{
-FullyConnectedLayerNode::FullyConnectedLayerNode(unsigned int num_outputs)
- : _num_outputs(num_outputs)
+FullyConnectedLayerNode::FullyConnectedLayerNode(unsigned int num_outputs, QuantizationInfo out_quant_info, FullyConnectedLayerInfo fc_info)
+ : _num_outputs(num_outputs), _out_quant_info(out_quant_info), _info(fc_info)
{
_input_edges.resize(3, EmptyEdgeID);
_outputs.resize(1, NullTensorID);
}
TensorDescriptor FullyConnectedLayerNode::compute_weights_descriptor(const TensorDescriptor &input_descriptor,
- unsigned int num_outputs)
+ unsigned int num_outputs,
+ FullyConnectedLayerInfo fc_info,
+ QuantizationInfo weights_quant_info)
{
unsigned int num_weights = 1;
unsigned int num_dimensions = input_descriptor.shape.num_dimensions();
@@ -56,11 +58,24 @@
TensorDescriptor weights_descriptor = input_descriptor;
weights_descriptor.shape = TensorShape(num_weights, num_outputs);
+ // If weights are tranposed, use tranposed shape
+ if(!fc_info.transpose_weights)
+ {
+ weights_descriptor.shape = TensorShape(num_outputs, num_weights);
+ }
+
+ // Set quantization info if present
+ if(!weights_quant_info.empty())
+ {
+ weights_descriptor.quant_info = weights_quant_info;
+ }
+
return weights_descriptor;
}
TensorDescriptor FullyConnectedLayerNode::compute_output_descriptor(const TensorDescriptor &input_descriptor,
- unsigned int num_outputs)
+ unsigned int num_outputs,
+ QuantizationInfo out_quant_info)
{
// Note: Only 1D batch space is supported at the moment
unsigned int batches = input_descriptor.shape[1];
@@ -69,12 +84,24 @@
batches = input_descriptor.shape[3];
}
+ // Set descriptor shape
TensorDescriptor output_descriptor = input_descriptor;
output_descriptor.shape = TensorShape(num_outputs, batches);
+ // Set quantization info if present
+ if(!out_quant_info.empty())
+ {
+ output_descriptor.quant_info = out_quant_info;
+ }
+
return output_descriptor;
}
+FullyConnectedLayerInfo FullyConnectedLayerNode::info() const
+{
+ return _info;
+}
+
bool FullyConnectedLayerNode::forward_descriptors()
{
if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
@@ -93,7 +120,7 @@
const Tensor *src = input(0);
ARM_COMPUTE_ERROR_ON(src == nullptr);
- return compute_output_descriptor(src->desc(), _num_outputs);
+ return compute_output_descriptor(src->desc(), _num_outputs, _out_quant_info);
}
NodeType FullyConnectedLayerNode::type() const
diff --git a/src/graph/nodes/PermuteLayerNode.cpp b/src/graph/nodes/PermuteLayerNode.cpp
new file mode 100644
index 0000000..042ec09
--- /dev/null
+++ b/src/graph/nodes/PermuteLayerNode.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/nodes/PermuteLayerNode.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/INodeVisitor.h"
+
+#include "arm_compute/core/Helpers.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+PermuteLayerNode::PermuteLayerNode(PermutationVector perm, DataLayout layout)
+ : _perm(perm), _layout(layout)
+{
+ _input_edges.resize(1, EmptyEdgeID);
+ _outputs.resize(1, NullTensorID);
+}
+
+const PermutationVector &PermuteLayerNode::permutation_vector() const
+{
+ return _perm;
+}
+
+bool PermuteLayerNode::forward_descriptors()
+{
+ if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+ {
+ Tensor *dst = output(0);
+ ARM_COMPUTE_ERROR_ON(dst == nullptr);
+ dst->desc() = configure_output(0);
+ return true;
+ }
+ return false;
+}
+
+TensorDescriptor PermuteLayerNode::configure_output(size_t idx) const
+{
+ ARM_COMPUTE_UNUSED(idx);
+ ARM_COMPUTE_ERROR_ON(idx >= _outputs.size());
+
+ const Tensor *src = input(0);
+ ARM_COMPUTE_ERROR_ON(src == nullptr);
+
+ TensorDescriptor output_desc = src->desc();
+ permute(output_desc.shape, _perm);
+ if(_layout != DataLayout::UNKNOWN)
+ {
+ output_desc.layout = _layout;
+ }
+
+ return output_desc;
+}
+
+NodeType PermuteLayerNode::type() const
+{
+ return NodeType::PermuteLayer;
+}
+
+void PermuteLayerNode::accept(INodeVisitor &v)
+{
+ v.visit(*this);
+}
+} // namespace graph
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/graph/nodes/ResizeLayerNode.cpp b/src/graph/nodes/ResizeLayerNode.cpp
new file mode 100644
index 0000000..a6aa7bf
--- /dev/null
+++ b/src/graph/nodes/ResizeLayerNode.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/nodes/ResizeLayerNode.h"
+
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/INodeVisitor.h"
+#include "arm_compute/graph/Utils.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+ResizeLayerNode::ResizeLayerNode(InterpolationPolicy policy, float scale_width, float scale_height)
+ : _policy(policy), _scale_width(scale_width), _scale_height(scale_height)
+{
+ _input_edges.resize(1, EmptyEdgeID);
+ _outputs.resize(1, NullTensorID);
+}
+
+InterpolationPolicy ResizeLayerNode::policy() const
+{
+ return _policy;
+}
+
+std::pair<float, float> ResizeLayerNode::scaling_factor() const
+{
+ return std::make_pair(_scale_width, _scale_height);
+}
+
+bool ResizeLayerNode::forward_descriptors()
+{
+ if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+ {
+ Tensor *dst = output(0);
+ ARM_COMPUTE_ERROR_ON(dst == nullptr);
+ dst->desc() = configure_output(0);
+ return true;
+ }
+ return false;
+}
+
+TensorDescriptor ResizeLayerNode::configure_output(size_t idx) const
+{
+ ARM_COMPUTE_UNUSED(idx);
+ ARM_COMPUTE_ERROR_ON(idx >= _outputs.size());
+
+ const Tensor *src = input(0);
+ ARM_COMPUTE_ERROR_ON(src == nullptr);
+
+ TensorDescriptor output_desc = src->desc();
+ size_t width_idx = get_dimension_idx(output_desc, DataLayoutDimension::WIDTH);
+ size_t height_idx = get_dimension_idx(output_desc, DataLayoutDimension::HEIGHT);
+ output_desc.shape.set(width_idx, static_cast<int>(output_desc.shape[width_idx] * _scale_width));
+ output_desc.shape.set(height_idx, static_cast<int>(output_desc.shape[height_idx] * _scale_height));
+
+ return output_desc;
+}
+
+NodeType ResizeLayerNode::type() const
+{
+ return NodeType::ResizeLayer;
+}
+
+void ResizeLayerNode::accept(INodeVisitor &v)
+{
+ v.visit(*this);
+}
+} // namespace graph
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/graph/printers/DotGraphPrinter.cpp b/src/graph/printers/DotGraphPrinter.cpp
index 61cf423..ef156ea 100644
--- a/src/graph/printers/DotGraphPrinter.cpp
+++ b/src/graph/printers/DotGraphPrinter.cpp
@@ -47,6 +47,15 @@
_info = ss.str();
}
+void DotGraphVisitor::visit(ConcatenateLayerNode &n)
+{
+ std::stringstream ss;
+ ss << "Enabled: " << n.is_enabled();
+ ss << R"( \n )";
+ ss << "Axis: " << n.concatenation_axis();
+ _info = ss.str();
+}
+
void DotGraphVisitor::visit(ConvolutionLayerNode &n)
{
std::stringstream ss;
@@ -54,13 +63,6 @@
_info = ss.str();
}
-void DotGraphVisitor::visit(DepthConcatenateLayerNode &n)
-{
- std::stringstream ss;
- ss << "Enabled: " << n.is_enabled();
- _info = ss.str();
-}
-
void DotGraphVisitor::visit(DepthwiseConvolutionLayerNode &n)
{
std::stringstream ss;