arm_compute v18.02
Change-Id: I7207aa488e5470f235f39b6c188b4678dc38d1a6
diff --git a/src/graph/Graph.cpp b/src/graph/Graph.cpp
index ac5316f..b6c6822 100644
--- a/src/graph/Graph.cpp
+++ b/src/graph/Graph.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -62,6 +62,7 @@
std::unique_ptr<INode> _current_node{ nullptr };
ITensorObject *_current_output{ nullptr };
bool _info_enabled{ false };
+ CLTuner _tuner{};
private:
ITensorObject *_current_input{ nullptr };
@@ -76,10 +77,22 @@
Graph::Graph()
: _pimpl{ new Private() }
{
+ graph_init();
+}
+
+void Graph::graph_init(const bool use_cl_tuner)
+{
// Check if OpenCL is available and initialize the scheduler
if(opencl_is_available())
{
- arm_compute::CLScheduler::get().default_init();
+ if(use_cl_tuner)
+ {
+ arm_compute::CLScheduler::get().default_init(&_pimpl->_tuner);
+ }
+ else
+ {
+ arm_compute::CLScheduler::get().default_init();
+ }
}
}
@@ -119,6 +132,11 @@
_previous_hints = _current_hints; // For the first node just assume the previous node was of the same type as this one
}
+ if(_current_node->supports_in_place())
+ {
+ _current_output = _current_input;
+ }
+
//Automatic output configuration ?
if(_current_output == nullptr)
{
@@ -140,8 +158,12 @@
_ctx.hints() = _current_hints;
std::unique_ptr<arm_compute::IFunction> func = _current_node->instantiate_node(_ctx, _current_input, _current_output);
- // Allocate current input
- _current_input->allocate();
+ // If the operation is done in-place, do not allocate or it will prevent following layers from performing the configuration
+ if(!_current_node->supports_in_place())
+ {
+ // Allocate current input
+ _current_input->allocate();
+ }
// Map input if needed
if(_current_input->target() == TargetHint::OPENCL)
@@ -215,11 +237,25 @@
_pimpl->_graph_output->allocate();
}
}
+
bool Graph::opencl_is_available()
{
return arm_compute::opencl_is_available();
}
+arm_compute::GPUTarget Graph::gpu_target()
+{
+ // Check if OpenCL is available before returning the GPU target
+ if(opencl_is_available())
+ {
+ return arm_compute::CLScheduler::get().target();
+ }
+ else
+ {
+ return GPUTarget::MIDGARD;
+ }
+}
+
void Graph::set_temp(TensorInfo &&tmp)
{
ARM_COMPUTE_ERROR_ON(_pimpl->_graph_input == nullptr);
diff --git a/src/graph/INode.cpp b/src/graph/INode.cpp
index 582f936..c753f66 100644
--- a/src/graph/INode.cpp
+++ b/src/graph/INode.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,6 +39,14 @@
ARM_COMPUTE_ERROR_ON(target_hint == TargetHint::OPENCL && !opencl_is_available());
return target_hint;
}
+bool INode::supports_in_place() const
+{
+ return _supports_in_place;
+}
+void INode::set_supports_in_place(bool value)
+{
+ _supports_in_place = value;
+}
GraphHints INode::node_override_hints(GraphHints hints) const
{
TargetHint target_hint = hints.target_hint();
diff --git a/src/graph/SubGraph.cpp b/src/graph/SubGraph.cpp
index 8ba2af6..4065e1d 100644
--- a/src/graph/SubGraph.cpp
+++ b/src/graph/SubGraph.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -66,6 +66,10 @@
}
graph->add_tensor_object(std::move(_input));
+ // Make sure first and last nodes of the subgraph always do operations out-of-place
+ _nodes.front()->set_supports_in_place(false);
+ _nodes.back()->set_supports_in_place(false);
+
// Construct nodes
for(auto &node : _nodes)
{
diff --git a/src/graph/nodes/ActivationLayer.cpp b/src/graph/nodes/ActivationLayer.cpp
index 54f30ef..546c42a 100644
--- a/src/graph/nodes/ActivationLayer.cpp
+++ b/src/graph/nodes/ActivationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -33,6 +33,7 @@
ActivationLayer::ActivationLayer(const ActivationLayerInfo activation_info)
: _activation_info(activation_info)
{
+ set_supports_in_place(true);
}
std::unique_ptr<arm_compute::IFunction> ActivationLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
diff --git a/src/graph/nodes/BatchNormalizationLayer.cpp b/src/graph/nodes/BatchNormalizationLayer.cpp
index 7851aa5..24287ac 100644
--- a/src/graph/nodes/BatchNormalizationLayer.cpp
+++ b/src/graph/nodes/BatchNormalizationLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -77,6 +77,7 @@
node_ctx.add_input(_gamma.tensor());
node_ctx.add_output(out);
node_ctx.add_parameter<float>("epsilon", _epsilon);
+ node_ctx.add_parameter<ActivationLayerInfo>("act_info", _act_info);
// Configure operation
auto func = OperationRegistry::get().find_operation(OperationType::BatchNormalizationLayer, _target_hint)->configure(node_ctx);
diff --git a/src/graph/nodes/DepthwiseConvolutionLayer.cpp b/src/graph/nodes/DepthwiseConvolutionLayer.cpp
index 1209d03..e5101cc 100644
--- a/src/graph/nodes/DepthwiseConvolutionLayer.cpp
+++ b/src/graph/nodes/DepthwiseConvolutionLayer.cpp
@@ -40,10 +40,8 @@
if(_weights.tensor() == nullptr)
{
- TensorShape shape = in->info()->tensor_shape();
- shape.set(Window::DimX, _conv_width);
- shape.set(Window::DimY, _conv_height);
- TensorInfo info = TensorInfo(TensorShape(shape), in->info()->num_channels(), in->info()->data_type(), in->info()->fixed_point_position());
+ TensorShape weights_shape(_conv_width, _conv_height, input->tensor()->info()->tensor_shape().z());
+ TensorInfo info = TensorInfo(TensorShape(weights_shape), in->info()->num_channels(), in->info()->data_type(), in->info()->fixed_point_position());
info.set_quantization_info(_quant_info);
_weights.set_info(std::move(info));
}
diff --git a/src/graph/nodes/FullyConnectedLayer.cpp b/src/graph/nodes/FullyConnectedLayer.cpp
index 219e0f9..3742150 100644
--- a/src/graph/nodes/FullyConnectedLayer.cpp
+++ b/src/graph/nodes/FullyConnectedLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
diff --git a/src/graph/nodes/ResidualLayer.cpp b/src/graph/nodes/ResidualLayer.cpp
new file mode 100644
index 0000000..87404f9
--- /dev/null
+++ b/src/graph/nodes/ResidualLayer.cpp
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/nodes/ResidualLayer.h"
+
+#include "arm_compute/graph/Error.h"
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/NodeContext.h"
+#include "arm_compute/graph/OperationRegistry.h"
+#include "arm_compute/graph/SubGraph.h"
+#include "arm_compute/graph/Tensor.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "support/ToolchainSupport.h"
+#include "utils/Utils.h"
+
+#include <memory>
+#include <tuple>
+#include <vector>
+
+using namespace arm_compute::graph;
+
+/** Residual function */
+class ResidualFunction final : public arm_compute::IFunction
+{
+public:
+ /** Default Constructor */
+ ResidualFunction(GraphContext &ctx, ITensorObject *output)
+ : _ctx(ctx), _input(nullptr), _output(output), _func(nullptr), _graphs(), _graph_outputs()
+ {
+ }
+
+ /** Prevent instances from being copy constructed */
+ ResidualFunction(const ResidualFunction &) = delete;
+ /** Prevent instances from being copy assigned */
+ const ResidualFunction &operator=(const ResidualFunction &) = delete;
+ /** Prevent instances from being move constructed */
+ ResidualFunction(ResidualFunction &&) = delete;
+ /** Prevent instances from being move assigned */
+ ResidualFunction &operator=(ResidualFunction &&) = delete;
+ /** Default destructor */
+ ~ResidualFunction() override = default;
+
+ /** Set the input (when using only one sub graph)
+ *
+ * @param[in] input Input to set
+ */
+ void set_input(std::unique_ptr<ITensorObject> input)
+ {
+ _input = std::move(input);
+ }
+
+ /** Registers graph to be executed by the residual function
+ *
+ * @param[in] graph Graph to register
+ * @param[in] output Output to register
+ */
+ void register_graph(std::unique_ptr<Graph> graph, std::unique_ptr<ITensorObject> output)
+ {
+ _graphs.push_back(std::move(graph));
+ _graph_outputs.push_back(std::move(output));
+ }
+
+ /** Configure the function */
+ void configure()
+ {
+ ARM_COMPUTE_ERROR_ON(_graphs.size() < 1 || _graphs.size() > 2);
+ TargetHint target_hint = _ctx.hints().target_hint();
+
+ // Create node context
+ NodeContext node_ctx(OperationType::ArithmeticAddition);
+ node_ctx.set_target(target_hint);
+
+ if(_graphs.size() == 1)
+ {
+ arm_compute::ITensor *in = _input->tensor();
+ node_ctx.add_input(in);
+ }
+
+ for(auto &o : _graph_outputs)
+ {
+ arm_compute::ITensor *in = o->tensor();
+ node_ctx.add_input(in);
+ }
+
+ arm_compute::ITensor *out = _output->tensor();
+ auto_init_if_empty(*out->info(), *_graph_outputs[0]->tensor()->info());
+ node_ctx.add_output(out);
+
+ _func = OperationRegistry::get().find_operation(OperationType::ArithmeticAddition, target_hint)->configure(node_ctx);
+
+ for(auto &o : _graph_outputs)
+ {
+ o->allocate();
+ }
+ }
+
+ // Inherited methods overriden:
+ void run() override
+ {
+ ARM_COMPUTE_ERROR_ON(_graphs.size() < 1 || _graphs.size() > 2);
+
+ for(auto &g : _graphs)
+ {
+ ARM_COMPUTE_ERROR_ON(g.get() == nullptr);
+ g->run();
+ }
+
+ _func->run();
+ }
+
+private:
+ GraphContext _ctx;
+ std::unique_ptr<ITensorObject> _input;
+ ITensorObject *_output;
+ std::unique_ptr<arm_compute::IFunction> _func;
+ std::vector<std::unique_ptr<Graph>> _graphs;
+ std::vector<std::unique_ptr<ITensorObject>> _graph_outputs;
+};
+
+std::unique_ptr<arm_compute::IFunction> ResidualLayer::instantiate_node(GraphContext &ctx, ITensorObject *input, ITensorObject *output)
+{
+ ARM_COMPUTE_ERROR_ON_UNALLOCATED_TENSOR_OBJECT(input, output);
+ ARM_COMPUTE_ERROR_ON(dynamic_cast<Tensor *>(input) == nullptr);
+ ARM_COMPUTE_ERROR_ON(dynamic_cast<Tensor *>(output) == nullptr);
+
+ // Create residual function
+ auto func = arm_compute::support::cpp14::make_unique<ResidualFunction>(ctx, output);
+
+ if(_sub_graphs.size() == 1)
+ {
+ std::unique_ptr<ITensorObject> original_in;
+ original_in = arm_compute::support::cpp14::make_unique<SubTensor>(*dynamic_cast<Tensor *>(input),
+ input->tensor()->info()->tensor_shape(),
+ Coordinates());
+ func->set_input(std::move(original_in));
+ }
+
+ // Constuct all sub-graphs given the input/output
+ for(auto &sg : _sub_graphs)
+ {
+ ARM_COMPUTE_ERROR_ON(sg.get() == nullptr);
+
+ // IO buffers
+ std::unique_ptr<ITensorObject> in;
+ std::unique_ptr<ITensorObject> out;
+ std::unique_ptr<ITensorObject> func_in;
+
+ // Create input sub-tensor
+ if(!sg->has_input())
+ {
+ in = arm_compute::support::cpp14::make_unique<SubTensor>(*dynamic_cast<Tensor *>(input),
+ input->tensor()->info()->tensor_shape(),
+ Coordinates());
+ }
+
+ // Create output sub-tensor
+ if(!sg->has_output())
+ {
+ ITensorInfo *info = input->tensor()->info();
+ func_in = arm_compute::support::cpp14::make_unique<Tensor>(TensorInfo(info->num_channels(), info->data_type(), info->fixed_point_position()));
+ func_in->set_target(ctx.hints().target_hint());
+ out = arm_compute::support::cpp14::make_unique<SubTensor>(func_in->tensor(),
+ TensorShape(),
+ Coordinates(0, 0, 0),
+ func_in->target(),
+ true);
+ }
+
+ // Construct sub_graph
+ auto g = sg->construct(ctx, std::move(in), std::move(out));
+
+ // Register graph to function
+ func->register_graph(std::move(g), std::move(func_in));
+ }
+
+ func->configure();
+
+ return std::move(func);
+}
diff --git a/src/graph/operations/CLSimpleOperations.cpp b/src/graph/operations/CLSimpleOperations.cpp
index 61315e7..fe56122 100644
--- a/src/graph/operations/CLSimpleOperations.cpp
+++ b/src/graph/operations/CLSimpleOperations.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -66,6 +66,34 @@
return std::move(activation);
}
+/* Arithmetic addition */
+REGISTER_SIMPLE_OPERATION(CLArithmeticAdditionOperation, OPENCL, OperationType::ArithmeticAddition)
+{
+ ARM_COMPUTE_ERROR_ON(ctx.num_inputs() != 2);
+ ARM_COMPUTE_ERROR_ON(ctx.num_outputs() != 1);
+ ARM_COMPUTE_ERROR_ON(dynamic_cast<arm_compute::ICLTensor *>(ctx.input(0)) == nullptr);
+ ARM_COMPUTE_ERROR_ON(dynamic_cast<arm_compute::ICLTensor *>(ctx.input(1)) == nullptr);
+ ARM_COMPUTE_ERROR_ON(dynamic_cast<arm_compute::ICLTensor *>(ctx.output(0)) == nullptr);
+
+ // Extract IO and info
+ auto *in1 = dynamic_cast<arm_compute::ICLTensor *>(ctx.input(0));
+ auto *in2 = dynamic_cast<arm_compute::ICLTensor *>(ctx.input(1));
+ auto *out = dynamic_cast<arm_compute::ICLTensor *>(ctx.output(0));
+
+ auto addition = arm_compute::support::cpp14::make_unique<arm_compute::CLArithmeticAddition>();
+ addition->configure(in1, in2, out, ConvertPolicy::SATURATE);
+
+ // Log info
+ ARM_COMPUTE_LOG_GRAPH_INFO("Instantiating CLArithmeticAddition"
+ << " Data Type: " << in1->info()->data_type()
+ << " Input 1 shape: " << in1->info()->tensor_shape()
+ << " Input 2 shape: " << in2->info()->tensor_shape()
+ << " Output shape: " << out->info()->tensor_shape()
+ << std::endl);
+
+ return std::move(addition);
+}
+
/* Batch Normalization Layer */
REGISTER_SIMPLE_OPERATION(CLBatchNormalizationLayerOperation, OPENCL, OperationType::BatchNormalizationLayer)
{
@@ -79,17 +107,18 @@
ARM_COMPUTE_ERROR_ON(dynamic_cast<arm_compute::ICLTensor *>(ctx.output(0)) == nullptr);
// Extract IO and info
- auto *in = dynamic_cast<arm_compute::ICLTensor *>(ctx.input(0));
- auto *mean = dynamic_cast<arm_compute::ICLTensor *>(ctx.input(1));
- auto *var = dynamic_cast<arm_compute::ICLTensor *>(ctx.input(2));
- auto *beta = dynamic_cast<arm_compute::ICLTensor *>(ctx.input(3));
- auto *gamma = dynamic_cast<arm_compute::ICLTensor *>(ctx.input(4));
- auto *out = dynamic_cast<arm_compute::ICLTensor *>(ctx.output(0));
- const auto epsilon = ctx.parameter<float>("epsilon");
+ auto *in = dynamic_cast<arm_compute::ICLTensor *>(ctx.input(0));
+ auto *mean = dynamic_cast<arm_compute::ICLTensor *>(ctx.input(1));
+ auto *var = dynamic_cast<arm_compute::ICLTensor *>(ctx.input(2));
+ auto *beta = dynamic_cast<arm_compute::ICLTensor *>(ctx.input(3));
+ auto *gamma = dynamic_cast<arm_compute::ICLTensor *>(ctx.input(4));
+ auto *out = dynamic_cast<arm_compute::ICLTensor *>(ctx.output(0));
+ const auto epsilon = ctx.parameter<float>("epsilon");
+ const auto act_info = ctx.parameter<ActivationLayerInfo>("act_info");
// Create and configure function
auto batch_norm = arm_compute::support::cpp14::make_unique<arm_compute::CLBatchNormalizationLayer>();
- batch_norm->configure(in, out, mean, var, beta, gamma, epsilon);
+ batch_norm->configure(in, out, mean, var, beta, gamma, epsilon, act_info);
// Log info
ARM_COMPUTE_LOG_GRAPH_INFO("Instantiating CLBatchNormalizationLayer"
@@ -101,6 +130,9 @@
<< " Beta shape: " << beta->info()->tensor_shape()
<< " Gamma shape: " << gamma->info()->tensor_shape()
<< " Epsilon: " << epsilon
+ << " Activation function: " << act_info.activation()
+ << " a: " << act_info.a()
+ << " b: " << act_info.b()
<< std::endl);
return std::move(batch_norm);
@@ -460,4 +492,4 @@
<< std::endl);
return std::move(smx);
-}
\ No newline at end of file
+}
diff --git a/src/graph/operations/NESimpleOperations.cpp b/src/graph/operations/NESimpleOperations.cpp
index 49adbe9..4154b9a 100644
--- a/src/graph/operations/NESimpleOperations.cpp
+++ b/src/graph/operations/NESimpleOperations.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -66,6 +66,34 @@
return std::move(activation);
}
+/* Arithmetic addition */
+REGISTER_SIMPLE_OPERATION(NEArithmeticAdditionOperation, NEON, OperationType::ArithmeticAddition)
+{
+ ARM_COMPUTE_ERROR_ON(ctx.num_inputs() != 2);
+ ARM_COMPUTE_ERROR_ON(ctx.num_outputs() != 1);
+ ARM_COMPUTE_ERROR_ON(dynamic_cast<arm_compute::ITensor *>(ctx.input(0)) == nullptr);
+ ARM_COMPUTE_ERROR_ON(dynamic_cast<arm_compute::ITensor *>(ctx.input(1)) == nullptr);
+ ARM_COMPUTE_ERROR_ON(dynamic_cast<arm_compute::ITensor *>(ctx.output(0)) == nullptr);
+
+ // Extract IO and info
+ auto *in1 = dynamic_cast<arm_compute::ITensor *>(ctx.input(0));
+ auto *in2 = dynamic_cast<arm_compute::ITensor *>(ctx.input(1));
+ auto *out = dynamic_cast<arm_compute::ITensor *>(ctx.output(0));
+
+ auto addition = arm_compute::support::cpp14::make_unique<arm_compute::NEArithmeticAddition>();
+ addition->configure(in1, in2, out, ConvertPolicy::SATURATE);
+
+ // Log info
+ ARM_COMPUTE_LOG_GRAPH_INFO("Instantiating NEArithmeticAddition"
+ << " Data Type: " << in1->info()->data_type()
+ << " Input 1 shape: " << in1->info()->tensor_shape()
+ << " Input 2 shape: " << in2->info()->tensor_shape()
+ << " Output shape: " << out->info()->tensor_shape()
+ << std::endl);
+
+ return std::move(addition);
+}
+
/* Batch Normalization Layer */
REGISTER_SIMPLE_OPERATION(NEBatchNormalizationLayerOperation, NEON, OperationType::BatchNormalizationLayer)
{
@@ -79,17 +107,18 @@
ARM_COMPUTE_ERROR_ON(dynamic_cast<arm_compute::ITensor *>(ctx.output(0)) == nullptr);
// Extract IO and info
- auto *in = dynamic_cast<arm_compute::ITensor *>(ctx.input(0));
- auto *mean = dynamic_cast<arm_compute::ITensor *>(ctx.input(1));
- auto *var = dynamic_cast<arm_compute::ITensor *>(ctx.input(2));
- auto *beta = dynamic_cast<arm_compute::ITensor *>(ctx.input(3));
- auto *gamma = dynamic_cast<arm_compute::ITensor *>(ctx.input(4));
- auto *out = dynamic_cast<arm_compute::ITensor *>(ctx.output(0));
- const auto epsilon = ctx.parameter<float>("epsilon");
+ auto *in = dynamic_cast<arm_compute::ITensor *>(ctx.input(0));
+ auto *mean = dynamic_cast<arm_compute::ITensor *>(ctx.input(1));
+ auto *var = dynamic_cast<arm_compute::ITensor *>(ctx.input(2));
+ auto *beta = dynamic_cast<arm_compute::ITensor *>(ctx.input(3));
+ auto *gamma = dynamic_cast<arm_compute::ITensor *>(ctx.input(4));
+ auto *out = dynamic_cast<arm_compute::ITensor *>(ctx.output(0));
+ const auto epsilon = ctx.parameter<float>("epsilon");
+ const auto act_info = ctx.parameter<ActivationLayerInfo>("act_info");
// Create and configure function
auto batch_norm = arm_compute::support::cpp14::make_unique<arm_compute::NEBatchNormalizationLayer>();
- batch_norm->configure(in, out, mean, var, beta, gamma, epsilon);
+ batch_norm->configure(in, out, mean, var, beta, gamma, epsilon, act_info);
// Log info
ARM_COMPUTE_LOG_GRAPH_INFO("Instantiating NEBatchNormalizationLayer"
@@ -101,6 +130,9 @@
<< " Beta shape: " << beta->info()->tensor_shape()
<< " Gamma shape: " << gamma->info()->tensor_shape()
<< " Epsilon: " << epsilon
+ << " Activation function: " << act_info.activation()
+ << " a: " << act_info.a()
+ << " b: " << act_info.b()
<< std::endl);
return std::move(batch_norm);
@@ -149,12 +181,23 @@
auto *biases = ctx.num_inputs() == 3 ? dynamic_cast<arm_compute::ITensor *>(ctx.input(2)) : nullptr;
auto *out = dynamic_cast<arm_compute::ITensor *>(ctx.output(0));
const auto conv_info = ctx.parameter<PadStrideInfo>("ConvolutionInfo");
+ const auto opt3x3 = ctx.parameter<bool>("Optimized3x3");
// Create and configure function
std::unique_ptr<arm_compute::IFunction> func;
- auto depwthwise_conv = arm_compute::support::cpp14::make_unique<arm_compute::NEDepthwiseConvolutionLayer>();
- depwthwise_conv->configure(in, weights, biases, out, conv_info);
- func = std::move(depwthwise_conv);
+ bool run_3x3_opt = opt3x3 && weights->info()->dimension(0) == 3;
+ if(run_3x3_opt)
+ {
+ auto depwthwise_conv = arm_compute::support::cpp14::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>();
+ depwthwise_conv->configure(in, weights, biases, out, conv_info);
+ func = std::move(depwthwise_conv);
+ }
+ else
+ {
+ auto depwthwise_conv = arm_compute::support::cpp14::make_unique<arm_compute::NEDepthwiseConvolutionLayer>();
+ depwthwise_conv->configure(in, weights, biases, out, conv_info);
+ func = std::move(depwthwise_conv);
+ }
// Log info
ARM_COMPUTE_LOG_GRAPH_INFO("Instantiating NEDepthwiseConvolutionLayer"