Implement "Greedy by size planner" memory optimization
The planner optimize intermediate tensor memory allocation to reduce memory footprint. See http://arxiv.org/abs/2001.03288 for details.
This optimization could be turned off during compilation by adding -DXNN_ENABLE_MEM_OPT=OFF or --define=xnnpack_enable_memopt=false when using Bazel.
PiperOrigin-RevId: 311471060
diff --git a/test/memory-planner-test.cc b/test/memory-planner-test.cc
new file mode 100644
index 0000000..3f69ef3
--- /dev/null
+++ b/test/memory-planner-test.cc
@@ -0,0 +1,192 @@
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <xnnpack.h>
+#include <xnnpack/memory-planner.h>
+#include <xnnpack/subgraph.h>
+
+#include <gtest/gtest.h>
+
+TEST(MemoryPlanner, ValueLiveInfo) {
+ EXPECT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
+ // Create simple subgraph where it has 2 nodes and 4 tensors as illustrated below:
+ // T0 ----> N0 ----> T2 and T2 ----> N1 ----> T3
+ // T1 ----/ T1 ----/
+ struct xnn_subgraph subgraph;
+ subgraph.num_values = 4;
+ subgraph.num_nodes = 2;
+ struct xnn_node nodes[2];
+ nodes[0].num_inputs = 2;
+ nodes[0].inputs[0] = 0;
+ nodes[0].inputs[1] = 1;
+ nodes[0].num_outputs = 1;
+ nodes[0].outputs[0] = 2;
+
+ nodes[1].num_inputs = 2;
+ nodes[1].inputs[0] = 1;
+ nodes[1].inputs[1] = 2;
+ nodes[1].num_outputs = 1;
+ nodes[1].outputs[0] = 3;
+ subgraph.nodes = nodes;
+
+ struct xnn_value_allocation_tracker tracker;
+ xnn_init_value_allocation_tracker(&tracker, &subgraph);
+
+ EXPECT_EQ(0, tracker.usage[0].first_node);
+ EXPECT_EQ(0, tracker.usage[0].last_node);
+
+ EXPECT_EQ(0, tracker.usage[1].first_node);
+ EXPECT_EQ(1, tracker.usage[1].last_node);
+
+ EXPECT_EQ(0, tracker.usage[2].first_node);
+ EXPECT_EQ(1, tracker.usage[2].last_node);
+
+ EXPECT_EQ(1, tracker.usage[3].first_node);
+ EXPECT_EQ(1, tracker.usage[3].last_node);
+
+ xnn_release_value_allocation_tracker(&tracker);
+}
+
+TEST(MemoryPlanner, MemoryBlocksCoalescing) {
+ EXPECT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
+ struct xnn_subgraph subgraph;
+ subgraph.num_nodes = 0;
+ subgraph.num_values = 5;
+ struct xnn_value_allocation_tracker tracker;
+ xnn_init_value_allocation_tracker(&tracker, &subgraph);
+ // As this is an empty subgraph, we create the following xnn_value_usage stub.
+ tracker.usage[0] = (struct xnn_value_usage){
+ .first_node = 1,
+ .last_node = 1,
+ };
+ xnn_add_value_allocation_tracker(&tracker, 0, 56);
+
+ tracker.usage[1] = (struct xnn_value_usage){
+ .first_node = 0,
+ .last_node = 1,
+ };
+ xnn_add_value_allocation_tracker(&tracker, 1, 40);
+
+ tracker.usage[2] = (struct xnn_value_usage){
+ .first_node = 1,
+ .last_node = 1,
+ };
+ xnn_add_value_allocation_tracker(&tracker, 2, 64);
+
+ tracker.usage[3] = (struct xnn_value_usage){
+ .first_node = 0,
+ .last_node = 0,
+ };
+ xnn_add_value_allocation_tracker(&tracker, 3, 152);
+
+ tracker.usage[4] = (struct xnn_value_usage){
+ .first_node = 1,
+ .last_node = 1,
+ };
+ xnn_add_value_allocation_tracker(&tracker, 4, 20);
+
+ xnn_plan_value_allocation_tracker(&tracker);
+
+#if XNN_ENABLE_MEMOPT
+ EXPECT_EQ(192, tracker.mem_arena_size);
+ EXPECT_EQ(64, tracker.usage[0].alloc_offset);
+ EXPECT_EQ(152, tracker.usage[1].alloc_offset);
+ EXPECT_EQ(0, tracker.usage[2].alloc_offset);
+ EXPECT_EQ(0, tracker.usage[3].alloc_offset);
+ EXPECT_EQ(120, tracker.usage[4].alloc_offset);
+#else
+ EXPECT_EQ(332, tracker.mem_arena_size);
+ EXPECT_EQ(0, tracker.usage[0].alloc_offset);
+ EXPECT_EQ(57, tracker.usage[1].alloc_offset);
+ EXPECT_EQ(96, tracker.usage[2].alloc_offset);
+ EXPECT_EQ(160, tracker.usage[3].alloc_offset);
+ EXPECT_EQ(312, tracker.usage[4].alloc_offset);
+#endif
+
+ xnn_release_value_allocation_tracker(&tracker);
+}
+
+TEST(MemoryPlanner, GeneralPlanning) {
+ EXPECT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
+ struct xnn_subgraph subgraph;
+ subgraph.num_nodes = 0;
+ subgraph.num_values = 8;
+ struct xnn_value_allocation_tracker tracker;
+ xnn_init_value_allocation_tracker(&tracker, &subgraph);
+ // As this is an empty subgraph, we create the following xnn_value_usage stub.
+ tracker.usage[0] = (struct xnn_value_usage){
+ .first_node = 0,
+ .last_node = 1,
+ };
+ xnn_add_value_allocation_tracker(&tracker, 0, 32);
+
+ tracker.usage[1] = (struct xnn_value_usage){
+ .first_node = 1,
+ .last_node = 4,
+ };
+ xnn_add_value_allocation_tracker(&tracker, 1, 28);
+
+ tracker.usage[2] = (struct xnn_value_usage){
+ .first_node = 2,
+ .last_node = 5,
+ };
+ xnn_add_value_allocation_tracker(&tracker, 2, 36);
+
+ tracker.usage[3] = (struct xnn_value_usage){
+ .first_node = 3,
+ .last_node = 5,
+ };
+ xnn_add_value_allocation_tracker(&tracker, 3, 16);
+
+ tracker.usage[4] = (struct xnn_value_usage){
+ .first_node = 4,
+ .last_node = 5,
+ };
+ xnn_add_value_allocation_tracker(&tracker, 4, 8);
+
+ tracker.usage[5] = (struct xnn_value_usage){
+ .first_node = 5,
+ .last_node = 7,
+ };
+ xnn_add_value_allocation_tracker(&tracker, 5, 64);
+
+ tracker.usage[6] = (struct xnn_value_usage){
+ .first_node = 6,
+ .last_node = 8,
+ };
+ xnn_add_value_allocation_tracker(&tracker, 6, 10);
+
+ tracker.usage[7] = (struct xnn_value_usage){
+ .first_node = 7,
+ .last_node = 8,
+ };
+ xnn_add_value_allocation_tracker(&tracker, 7, 40);
+
+ xnn_plan_value_allocation_tracker(&tracker);
+
+#if XNN_ENABLE_MEMOPT
+ EXPECT_EQ(124, tracker.mem_arena_size);
+ EXPECT_EQ(0, tracker.usage[0].alloc_offset);
+ EXPECT_EQ(32, tracker.usage[1].alloc_offset);
+ EXPECT_EQ(64, tracker.usage[2].alloc_offset);
+ EXPECT_EQ(100, tracker.usage[3].alloc_offset);
+ EXPECT_EQ(116, tracker.usage[4].alloc_offset);
+ EXPECT_EQ(0, tracker.usage[5].alloc_offset);
+ EXPECT_EQ(104, tracker.usage[6].alloc_offset);
+ EXPECT_EQ(64, tracker.usage[7].alloc_offset);
+#else
+ EXPECT_EQ(234, tracker.mem_arena_size);
+ EXPECT_EQ(0, tracker.usage[0].alloc_offset);
+ EXPECT_EQ(32, tracker.usage[1].alloc_offset);
+ EXPECT_EQ(60, tracker.usage[2].alloc_offset);
+ EXPECT_EQ(96, tracker.usage[3].alloc_offset);
+ EXPECT_EQ(112, tracker.usage[4].alloc_offset);
+ EXPECT_EQ(120, tracker.usage[5].alloc_offset);
+ EXPECT_EQ(184, tracker.usage[6].alloc_offset);
+ EXPECT_EQ(194, tracker.usage[7].alloc_offset);
+#endif
+
+ xnn_release_value_allocation_tracker(&tracker);
+}