arm_compute v19.11
diff --git a/src/graph/Graph.cpp b/src/graph/Graph.cpp
index 9d437b1..edc8d6b 100644
--- a/src/graph/Graph.cpp
+++ b/src/graph/Graph.cpp
@@ -68,7 +68,7 @@
EdgeID Graph::add_connection(NodeID source, size_t source_idx, NodeID sink, size_t sink_idx)
{
- std::lock_guard<arm_compute::Mutex> lock(_mtx);
+ arm_compute::lock_guard<arm_compute::Mutex> lock(_mtx);
// Check if node index is valid, if node exists and finally if the connection index is valid
ARM_COMPUTE_ERROR_ON((source >= _nodes.size()) || (_nodes[source] == nullptr) || (source_idx >= _nodes[source]->num_outputs()));
diff --git a/src/graph/GraphBuilder.cpp b/src/graph/GraphBuilder.cpp
index 228f2d2..89c8c20 100644
--- a/src/graph/GraphBuilder.cpp
+++ b/src/graph/GraphBuilder.cpp
@@ -376,6 +376,11 @@
return conv_nid;
}
+NodeID GraphBuilder::add_dequantization_node(Graph &g, NodeParams params, NodeIdxPair input)
+{
+ return create_simple_single_input_output_node<DequantizationLayerNode>(g, params, input);
+}
+
NodeID GraphBuilder::add_detection_output_node(Graph &g, NodeParams params, NodeIdxPair input_loc, NodeIdxPair input_conf, NodeIdxPair input_priorbox, const DetectionOutputLayerInfo &detect_info)
{
check_nodeidx_pair(input_loc, g);
diff --git a/src/graph/GraphContext.cpp b/src/graph/GraphContext.cpp
index 037b40b..4d97807 100644
--- a/src/graph/GraphContext.cpp
+++ b/src/graph/GraphContext.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,13 +32,14 @@
namespace graph
{
GraphContext::GraphContext()
- : _config(), _memory_managers()
+ : _config(), _memory_managers(), _weights_managers()
{
}
GraphContext::~GraphContext()
{
_memory_managers.clear();
+ _weights_managers.clear();
release_default_graph_context(*this);
}
@@ -74,6 +75,30 @@
return _memory_managers;
}
+bool GraphContext::insert_weights_management_ctx(WeightsManagerContext &&weights_managers)
+{
+ Target target = weights_managers.target;
+
+ if(_weights_managers.find(target) != std::end(_weights_managers))
+ {
+ return false;
+ }
+
+ _weights_managers[target] = std::move(weights_managers);
+
+ return true;
+}
+
+WeightsManagerContext *GraphContext::weights_management_ctx(Target target)
+{
+ return (_weights_managers.find(target) != std::end(_weights_managers)) ? &_weights_managers[target] : nullptr;
+}
+
+std::map<Target, WeightsManagerContext> &GraphContext::weights_managers()
+{
+ return _weights_managers;
+}
+
void GraphContext::finalize()
{
const size_t num_pools = 1;
diff --git a/src/graph/TypeLoader.cpp b/src/graph/TypeLoader.cpp
index b63672b..81a405b 100644
--- a/src/graph/TypeLoader.cpp
+++ b/src/graph/TypeLoader.cpp
@@ -131,7 +131,6 @@
static const std::map<std::string, DepthwiseConvolutionMethod> methods =
{
{ "default", DepthwiseConvolutionMethod::Default },
- { "gemv", DepthwiseConvolutionMethod::GEMV },
{ "optimized3x3", DepthwiseConvolutionMethod::Optimized3x3 },
};
diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp
index 0666ec0..de31847 100644
--- a/src/graph/backends/CL/CLDeviceBackend.cpp
+++ b/src/graph/backends/CL/CLDeviceBackend.cpp
@@ -34,11 +34,13 @@
#include "arm_compute/graph/backends/CL/CLSubTensorHandle.h"
#include "arm_compute/graph/backends/CL/CLTensorHandle.h"
+#include "arm_compute/core/CL/CLCoreRuntimeContext.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/runtime/BlobLifetimeManager.h"
#include "arm_compute/runtime/CL/CLBufferAllocator.h"
-#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/IWeightsManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/MemoryManagerOnDemand.h"
#include "arm_compute/runtime/PoolManager.h"
@@ -90,9 +92,8 @@
{
// Setup Scheduler
CLScheduler::get().default_init(&_tuner);
-
// Create allocator with new context
- _allocator = support::cpp14::make_unique<CLBufferAllocator>();
+ _allocator = support::cpp14::make_unique<CLBufferAllocator>(nullptr /* legacy path for CLCoreRuntimeContext */);
}
void CLDeviceBackend::release_backend_context(GraphContext &ctx)
@@ -132,11 +133,21 @@
mm_ctx.target = Target::CL;
mm_ctx.intra_mm = create_memory_manager(MemoryManagerAffinity::Buffer);
mm_ctx.cross_mm = create_memory_manager(MemoryManagerAffinity::Buffer);
- mm_ctx.cross_group = std::make_shared<CLMemoryGroup>(mm_ctx.cross_mm);
+ mm_ctx.cross_group = std::make_shared<MemoryGroup>(mm_ctx.cross_mm);
mm_ctx.allocator = _allocator.get();
ctx.insert_memory_management_ctx(std::move(mm_ctx));
}
+
+ // Create function level weights manager
+ if(ctx.weights_management_ctx(Target::CL) == nullptr)
+ {
+ WeightsManagerContext wm_ctx;
+ wm_ctx.target = Target::CL;
+ wm_ctx.wm = create_weights_manager();
+
+ ctx.insert_weights_management_ctx(std::move(wm_ctx));
+ }
}
bool CLDeviceBackend::is_backend_supported()
@@ -204,6 +215,12 @@
return mm;
}
+
+std::shared_ptr<arm_compute::IWeightsManager> CLDeviceBackend::create_weights_manager()
+{
+ auto weights_mgr = std::make_shared<IWeightsManager>();
+ return weights_mgr;
+}
} // namespace backends
} // namespace graph
} // namespace arm_compute
diff --git a/src/graph/backends/CL/CLFunctionsFactory.cpp b/src/graph/backends/CL/CLFunctionsFactory.cpp
index 82b6dd6..57b48b0 100644
--- a/src/graph/backends/CL/CLFunctionsFactory.cpp
+++ b/src/graph/backends/CL/CLFunctionsFactory.cpp
@@ -25,6 +25,7 @@
#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/GraphContext.h"
#include "arm_compute/graph/backends/FunctionHelpers.h"
#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CPP/CPPFunctions.h"
@@ -56,13 +57,6 @@
using WinogradConvolutionLayer = CLWinogradConvolutionLayer;
};
-/** Collection of CL depthwise convolution functions */
-struct CLDepthwiseConvolutionLayerFunctions
-{
- using GenericDepthwiseConvolutionLayer = CLDepthwiseConvolutionLayer;
- using OptimizedDepthwiseConvolutionLayer = CLDepthwiseConvolutionLayer3x3;
-};
-
/** Collection of CL element-wise functions */
struct CLEltwiseFunctions
{
@@ -249,7 +243,9 @@
case NodeType::ConcatenateLayer:
return detail::create_concatenate_layer<CLConcatenateLayer, CLTargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
case NodeType::DepthwiseConvolutionLayer:
- return detail::create_depthwise_convolution_layer<CLDepthwiseConvolutionLayerFunctions, CLTargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+ return detail::create_depthwise_convolution_layer<CLDepthwiseConvolutionLayer, CLTargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+ case NodeType::DequantizationLayer:
+ return detail::create_dequantization_layer<CLDequantizationLayer, CLTargetInfo>(*polymorphic_downcast<DequantizationLayerNode *>(node));
case NodeType::DetectionOutputLayer:
return detail::create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(*polymorphic_downcast<DetectionOutputLayerNode *>(node));
case NodeType::DetectionPostProcessLayer:
@@ -261,9 +257,9 @@
case NodeType::FullyConnectedLayer:
return detail::create_fully_connected_layer<CLFullyConnectedLayer, CLTargetInfo>(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
case NodeType::FusedConvolutionBatchNormalizationLayer:
- return detail::create_fused_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(node));
+ return detail::create_fused_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(node), ctx);
case NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer:
- return detail::create_fused_depthwise_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node));
+ return detail::create_fused_depthwise_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node), ctx);
case NodeType::GenerateProposalsLayer:
return detail::create_generate_proposals_layer<CLGenerateProposalsLayer, CLTargetInfo>(*polymorphic_downcast<GenerateProposalsLayerNode *>(node), ctx);
case NodeType::NormalizationLayer:
diff --git a/src/graph/backends/CL/CLNodeValidator.cpp b/src/graph/backends/CL/CLNodeValidator.cpp
index 40ec508..8ca58bc 100644
--- a/src/graph/backends/CL/CLNodeValidator.cpp
+++ b/src/graph/backends/CL/CLNodeValidator.cpp
@@ -58,8 +58,9 @@
CLGEMMConvolutionLayer,
CLWinogradConvolutionLayer>(*polymorphic_downcast<ConvolutionLayerNode *>(node));
case NodeType::DepthwiseConvolutionLayer:
- return detail::validate_depthwise_convolution_layer<CLDepthwiseConvolutionLayer,
- CLDepthwiseConvolutionLayer3x3>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+ return detail::validate_depthwise_convolution_layer<CLDepthwiseConvolutionLayer>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+ case NodeType::DequantizationLayer:
+ return detail::validate_dequantization_layer<CLDequantizationLayer>(*polymorphic_downcast<DequantizationLayerNode *>(node));
case NodeType::DetectionOutputLayer:
return detail::validate_detection_output_layer<CPPDetectionOutputLayer>(*polymorphic_downcast<DetectionOutputLayerNode *>(node));
case NodeType::DetectionPostProcessLayer:
diff --git a/src/graph/backends/CL/CLTensorHandle.cpp b/src/graph/backends/CL/CLTensorHandle.cpp
index 219d9d0..891c784 100644
--- a/src/graph/backends/CL/CLTensorHandle.cpp
+++ b/src/graph/backends/CL/CLTensorHandle.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,8 +23,7 @@
*/
#include "arm_compute/graph/backends/CL/CLTensorHandle.h"
-#include "arm_compute/core/utils/misc/Cast.h"
-#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/IMemoryGroup.h"
namespace arm_compute
{
@@ -52,8 +51,7 @@
{
if(mg != nullptr)
{
- auto *cl_mg = arm_compute::utils::cast::polymorphic_downcast<CLMemoryGroup *>(mg);
- cl_mg->manage(&_tensor);
+ mg->manage(&_tensor);
}
}
diff --git a/src/graph/backends/GLES/GCDeviceBackend.cpp b/src/graph/backends/GLES/GCDeviceBackend.cpp
index 5f0bf3f..83e2436 100644
--- a/src/graph/backends/GLES/GCDeviceBackend.cpp
+++ b/src/graph/backends/GLES/GCDeviceBackend.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,8 +36,8 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/runtime/BlobLifetimeManager.h"
#include "arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h"
-#include "arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h"
#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/MemoryManagerOnDemand.h"
#include "arm_compute/runtime/PoolManager.h"
@@ -85,7 +85,7 @@
mm_ctx.target = Target::GC;
mm_ctx.intra_mm = create_memory_manager(MemoryManagerAffinity::Buffer);
mm_ctx.cross_mm = create_memory_manager(MemoryManagerAffinity::Buffer);
- mm_ctx.cross_group = std::make_shared<GCMemoryGroup>(mm_ctx.cross_mm);
+ mm_ctx.cross_group = std::make_shared<MemoryGroup>(mm_ctx.cross_mm);
mm_ctx.allocator = &_allocator;
ctx.insert_memory_management_ctx(std::move(mm_ctx));
@@ -154,6 +154,11 @@
return mm;
}
+
+std::shared_ptr<arm_compute::IWeightsManager> GCDeviceBackend::create_weights_manager()
+{
+ return nullptr;
+}
} // namespace backends
} // namespace graph
} // namespace arm_compute
diff --git a/src/graph/backends/GLES/GCFunctionsFactory.cpp b/src/graph/backends/GLES/GCFunctionsFactory.cpp
index 13543db..b9562c7 100644
--- a/src/graph/backends/GLES/GCFunctionsFactory.cpp
+++ b/src/graph/backends/GLES/GCFunctionsFactory.cpp
@@ -25,6 +25,7 @@
#include "arm_compute/core/utils/misc/Cast.h"
#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/GraphContext.h"
#include "arm_compute/graph/backends/FunctionHelpers.h"
#include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h"
diff --git a/src/graph/backends/GLES/GCNodeValidator.cpp b/src/graph/backends/GLES/GCNodeValidator.cpp
index 9cbb9a1..15a66f4 100644
--- a/src/graph/backends/GLES/GCNodeValidator.cpp
+++ b/src/graph/backends/GLES/GCNodeValidator.cpp
@@ -58,7 +58,6 @@
// TODO (geopin01) : Switch when validation is implemented
// Validate function
ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->tensor_shape().x() != 3 && weights->tensor_shape().y() != 3, "Unsupported depthwise convolution");
- node.set_depthwise_convolution_method(DepthwiseConvolutionMethod::Optimized3x3);
return Status{};
}
@@ -111,6 +110,8 @@
return validate_convolution_layer(*polymorphic_downcast<ConvolutionLayerNode *>(node));
case NodeType::DepthwiseConvolutionLayer:
return validate_depthwise_convolution_layer(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+ case NodeType::DequantizationLayer:
+ return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : DequantizationLayer");
case NodeType::DetectionOutputLayer:
return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : DetectionOutputLayer");
case NodeType::DetectionPostProcessLayer:
diff --git a/src/graph/backends/GLES/GCTensorHandle.cpp b/src/graph/backends/GLES/GCTensorHandle.cpp
index 4e5c652..8f59262 100644
--- a/src/graph/backends/GLES/GCTensorHandle.cpp
+++ b/src/graph/backends/GLES/GCTensorHandle.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -23,8 +23,7 @@
*/
#include "arm_compute/graph/backends/GLES/GCTensorHandle.h"
-#include "arm_compute/core/utils/misc/Cast.h"
-#include "arm_compute/runtime/GLES_COMPUTE/GCMemoryGroup.h"
+#include "arm_compute/runtime/IMemoryGroup.h"
namespace arm_compute
{
@@ -52,8 +51,7 @@
{
if(mg != nullptr)
{
- auto *gc_mg = arm_compute::utils::cast::polymorphic_downcast<GCMemoryGroup *>(mg);
- gc_mg->manage(&_tensor);
+ mg->manage(&_tensor);
}
}
diff --git a/src/graph/backends/NEON/NEDeviceBackend.cpp b/src/graph/backends/NEON/NEDeviceBackend.cpp
index f94cd97..017b4f0 100644
--- a/src/graph/backends/NEON/NEDeviceBackend.cpp
+++ b/src/graph/backends/NEON/NEDeviceBackend.cpp
@@ -37,6 +37,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/runtime/Allocator.h"
#include "arm_compute/runtime/BlobLifetimeManager.h"
+#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/MemoryManagerOnDemand.h"
#include "arm_compute/runtime/OffsetLifetimeManager.h"
@@ -90,6 +91,16 @@
ctx.insert_memory_management_ctx(std::move(mm_ctx));
}
+
+ // Create function level weights manager
+ if(ctx.weights_management_ctx(Target::NEON) == nullptr)
+ {
+ WeightsManagerContext wm_ctx;
+ wm_ctx.target = Target::NEON;
+ wm_ctx.wm = create_weights_manager();
+
+ ctx.insert_weights_management_ctx(std::move(wm_ctx));
+ }
}
bool NEDeviceBackend::is_backend_supported()
@@ -159,6 +170,12 @@
return mm;
}
+
+std::shared_ptr<arm_compute::IWeightsManager> NEDeviceBackend::create_weights_manager()
+{
+ auto weights_mgr = std::make_shared<IWeightsManager>();
+ return weights_mgr;
+}
} // namespace backends
} // namespace graph
} // namespace arm_compute
diff --git a/src/graph/backends/NEON/NEFunctionFactory.cpp b/src/graph/backends/NEON/NEFunctionFactory.cpp
index 852de54..7ff68b5 100644
--- a/src/graph/backends/NEON/NEFunctionFactory.cpp
+++ b/src/graph/backends/NEON/NEFunctionFactory.cpp
@@ -62,13 +62,6 @@
using WinogradConvolutionLayer = NEWinogradConvolutionLayer;
};
-/** Collection of CL depthwise convolution functions */
-struct NEDepthwiseConvolutionLayerFunctions
-{
- using GenericDepthwiseConvolutionLayer = NEDepthwiseConvolutionLayer;
- using OptimizedDepthwiseConvolutionLayer = NEDepthwiseConvolutionLayerOptimized;
-};
-
/** Collection of CL element-wise functions */
struct NEEltwiseFunctions
{
@@ -115,6 +108,7 @@
std::shared_ptr<IMemoryManager> mm = get_memory_manager(ctx, Target::NEON);
std::unique_ptr<IFunction> func;
std::string func_name;
+
if(conv_algorithm == ConvolutionMethod::Direct)
{
std::tie(func, func_name) = create_named_memory_managed_function<NEDirectConvolutionLayer>(
@@ -212,11 +206,13 @@
case NodeType::ConcatenateLayer:
return detail::create_concatenate_layer<NEConcatenateLayer, NETargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
case NodeType::DepthwiseConvolutionLayer:
- return detail::create_depthwise_convolution_layer<NEDepthwiseConvolutionLayerFunctions, NETargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+ return detail::create_depthwise_convolution_layer<NEDepthwiseConvolutionLayer, NETargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+ case NodeType::DequantizationLayer:
+ return detail::create_dequantization_layer<NEDequantizationLayer, NETargetInfo>(*polymorphic_downcast<DequantizationLayerNode *>(node));
case NodeType::DetectionOutputLayer:
return detail::create_detection_output_layer<CPPDetectionOutputLayer, NETargetInfo>(*polymorphic_downcast<DetectionOutputLayerNode *>(node));
case NodeType::DetectionPostProcessLayer:
- return detail::create_detection_post_process_layer<CPPDetectionPostProcessLayer, NETargetInfo>(*polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
+ return detail::create_detection_post_process_layer<NEDetectionPostProcessLayer, NETargetInfo>(*polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
case NodeType::EltwiseLayer:
return detail::create_eltwise_layer<NEEltwiseFunctions, NETargetInfo>(*polymorphic_downcast<EltwiseLayerNode *>(node));
case NodeType::FlattenLayer:
@@ -224,9 +220,9 @@
case NodeType::FullyConnectedLayer:
return detail::create_fully_connected_layer<NEFullyConnectedLayer, NETargetInfo>(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
case NodeType::FusedConvolutionBatchNormalizationLayer:
- return detail::create_fused_convolution_batch_normalization_layer<NEFusedLayerTypes, NETargetInfo>(*polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(node));
+ return detail::create_fused_convolution_batch_normalization_layer<NEFusedLayerTypes, NETargetInfo>(*polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(node), ctx);
case NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer:
- return detail::create_fused_depthwise_convolution_batch_normalization_layer<NEFusedLayerTypes, NETargetInfo>(*polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node));
+ return detail::create_fused_depthwise_convolution_batch_normalization_layer<NEFusedLayerTypes, NETargetInfo>(*polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node), ctx);
case NodeType::NormalizationLayer:
return detail::create_normalization_layer<NENormalizationLayer, NETargetInfo>(*polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
case NodeType::PermuteLayer:
diff --git a/src/graph/backends/NEON/NENodeValidator.cpp b/src/graph/backends/NEON/NENodeValidator.cpp
index 734b340..fc84959 100644
--- a/src/graph/backends/NEON/NENodeValidator.cpp
+++ b/src/graph/backends/NEON/NENodeValidator.cpp
@@ -58,12 +58,13 @@
NEGEMMConvolutionLayer,
NEWinogradConvolutionLayer>(*polymorphic_downcast<ConvolutionLayerNode *>(node));
case NodeType::DepthwiseConvolutionLayer:
- return detail::validate_depthwise_convolution_layer<NEDepthwiseConvolutionLayer,
- NEDepthwiseConvolutionLayer3x3>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+ return detail::validate_depthwise_convolution_layer<NEDepthwiseConvolutionLayer>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
+ case NodeType::DequantizationLayer:
+ return detail::validate_dequantization_layer<NEDequantizationLayer>(*polymorphic_downcast<DequantizationLayerNode *>(node));
case NodeType::DetectionOutputLayer:
return detail::validate_detection_output_layer<CPPDetectionOutputLayer>(*polymorphic_downcast<DetectionOutputLayerNode *>(node));
case NodeType::DetectionPostProcessLayer:
- return detail::validate_detection_post_process_layer<CPPDetectionPostProcessLayer>(*polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
+ return detail::validate_detection_post_process_layer<NEDetectionPostProcessLayer>(*polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
case NodeType::GenerateProposalsLayer:
return ARM_COMPUTE_CREATE_ERROR(arm_compute::ErrorCode::RUNTIME_ERROR, "Unsupported operation : GenerateProposalsLayer");
case NodeType::NormalizePlanarYUVLayer:
diff --git a/src/graph/backends/NEON/NETensorHandle.cpp b/src/graph/backends/NEON/NETensorHandle.cpp
index 5892116..d58c45b 100644
--- a/src/graph/backends/NEON/NETensorHandle.cpp
+++ b/src/graph/backends/NEON/NETensorHandle.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -52,8 +52,7 @@
{
if(mg != nullptr)
{
- auto *ne_mg = arm_compute::utils::cast::polymorphic_downcast<MemoryGroup *>(mg);
- ne_mg->manage(&_tensor);
+ mg->manage(&_tensor);
}
}
diff --git a/src/graph/mutators/NodeFusionMutator.cpp b/src/graph/mutators/NodeFusionMutator.cpp
index f7f3454..abd6436 100644
--- a/src/graph/mutators/NodeFusionMutator.cpp
+++ b/src/graph/mutators/NodeFusionMutator.cpp
@@ -71,11 +71,10 @@
FastMathHint fast_math_hint = conv_node->fast_math_hint();
// Extract bn inputs
- const auto bn_mean_id = bn_node->input_edge(1)->producer_id();
- const auto bn_var_id = bn_node->input_edge(2)->producer_id();
- const auto bn_beta_id = bn_node->input_edge(3)->producer_id();
- const auto bn_gamma_id = bn_node->input_edge(4)->producer_id();
- const auto epsilon = bn_node->epsilon();
+ const auto bn_mean_id = bn_node->input_edge(1)->producer_id();
+ const auto bn_var_id = bn_node->input_edge(2)->producer_id();
+
+ const auto epsilon = bn_node->epsilon();
// Create the fused node
const NodeID fused_id = g.add_node<FusedConvolutionBatchNormalizationNode>(epsilon, conv_info, num_groups, conv_method, fast_math_hint, act_info);
@@ -91,8 +90,18 @@
g.add_connection(conv_weights_id, 0, fused_id, 1);
g.add_connection(bn_mean_id, 0, fused_id, 3);
g.add_connection(bn_var_id, 0, fused_id, 4);
- g.add_connection(bn_beta_id, 0, fused_id, 5);
- g.add_connection(bn_gamma_id, 0, fused_id, 6);
+
+ if(bn_node->input_edge(3) != nullptr)
+ {
+ const auto bn_beta_id = bn_node->input_edge(3)->producer_id();
+ g.add_connection(bn_beta_id, 0, fused_id, 5);
+ }
+
+ if(bn_node->input_edge(4) != nullptr)
+ {
+ const auto bn_gamma_id = bn_node->input_edge(4)->producer_id();
+ g.add_connection(bn_gamma_id, 0, fused_id, 6);
+ }
auto fused_node = g.node(fused_id);
std::vector<NodeIdxPair> bn_driving_nodes = get_driving_nodes(*bn_node);
@@ -300,22 +309,12 @@
return (output_qasymm8 && same_qinfo) || !output_qasymm8;
};
- Target target = g.nodes()[0].get()->output(0)->desc().target;
-
// Fusion mutations
detail::fuse_layer<BatchNormalizationLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<BatchNormalizationLayerNode>, supported_fused_activations);
detail::fuse_layer<ConvolutionLayerNode, ActivationLayerNode>(g, empty_prec, detail::fuse_node_with_activation<ConvolutionLayerNode>, supported_fused_activations);
detail::fuse_layer<DepthwiseConvolutionLayerNode, ActivationLayerNode>(g, qs8_prec, detail::fuse_node_with_activation<DepthwiseConvolutionLayerNode>, supported_fused_activations);
-
- // Currently fuse batch normalization brings performance uplift only on OpenCL with FP32 data type
- // TODO (COMPMID-2524): Fuse batch normalization with convolution and depthwise convolution at graph level for NEON - FP32
- // TODO (COMPMID-2581): Fuse batch normalization with convolution and depthwise convolution at graph level for OpenCL - FP16
- if(target == Target::CL && (g.nodes()[0].get()->output(0)->desc().data_type == DataType::F32))
- {
- //Depthwise Convolution and Batch Normalization Fusion active only for CL
- detail::fuse_layer<ConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_convolution_with_batch_normalization);
- detail::fuse_layer<DepthwiseConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization);
- }
+ detail::fuse_layer<ConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_convolution_with_batch_normalization);
+ detail::fuse_layer<DepthwiseConvolutionLayerNode, BatchNormalizationLayerNode>(g, empty_prec, detail::fuse_depthwise_convolution_with_batch_normalization);
}
} // namespace graph
} // namespace arm_compute
diff --git a/src/graph/nodes/DeconvolutionLayerNode.cpp b/src/graph/nodes/DeconvolutionLayerNode.cpp
index 28c7529..d4a5b76 100644
--- a/src/graph/nodes/DeconvolutionLayerNode.cpp
+++ b/src/graph/nodes/DeconvolutionLayerNode.cpp
@@ -56,10 +56,7 @@
const unsigned int kernel_width = get_dimension_size(weights_descriptor, DataLayoutDimension::WIDTH);
const unsigned int kernel_height = get_dimension_size(weights_descriptor, DataLayoutDimension::HEIGHT);
- std::tie(output_width, output_height) = deconvolution_output_dimensions(input_width, input_height,
- kernel_width, kernel_height,
- info.pad().first, info.pad().second,
- info.stride().first, info.stride().second);
+ std::tie(output_width, output_height) = deconvolution_output_dimensions(input_width, input_height, kernel_width, kernel_height, info);
const DataLayout data_layout = input_descriptor.layout;
TensorDescriptor output_descriptor = input_descriptor;
diff --git a/src/graph/nodes/DequantizationLayerNode.cpp b/src/graph/nodes/DequantizationLayerNode.cpp
new file mode 100644
index 0000000..27134b4
--- /dev/null
+++ b/src/graph/nodes/DequantizationLayerNode.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/graph/nodes/DequantizationLayerNode.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/graph/Graph.h"
+#include "arm_compute/graph/INodeVisitor.h"
+#include "arm_compute/graph/Tensor.h"
+
+namespace arm_compute
+{
+namespace graph
+{
+DequantizationLayerNode::DequantizationLayerNode()
+{
+ _input_edges.resize(1, EmptyEdgeID);
+ _outputs.resize(1, NullTensorID);
+}
+
+bool DequantizationLayerNode::forward_descriptors()
+{
+ if((input_id(0) != NullTensorID) && (output_id(0) != NullTensorID))
+ {
+ Tensor *dst = output(0);
+ ARM_COMPUTE_ERROR_ON(dst == nullptr);
+ dst->desc() = configure_output(0);
+ return true;
+ }
+ return false;
+}
+
+TensorDescriptor DequantizationLayerNode::configure_output(size_t idx) const
+{
+ ARM_COMPUTE_UNUSED(idx);
+ ARM_COMPUTE_ERROR_ON(idx >= _outputs.size());
+
+ const Tensor *src = input(0);
+ ARM_COMPUTE_ERROR_ON(src == nullptr);
+
+ TensorDescriptor output_desc = src->desc();
+ output_desc.data_type = DataType::F32;
+
+ return output_desc;
+}
+
+NodeType DequantizationLayerNode::type() const
+{
+ return NodeType::DequantizationLayer;
+}
+
+void DequantizationLayerNode::accept(INodeVisitor &v)
+{
+ v.visit(*this);
+}
+} // namespace graph
+} // namespace arm_compute
\ No newline at end of file