Blame - src/memory-planner.c - platform/external/XNNPACK

blob: 0eee06ab53f6c7bd29eba6aac127b3484a307694 [file] [log] [blame]

Chao Mei	6ddfc60	2020-05-13 22:29:36 -0700	[diff] [blame]	1	// Copyright 2020 Google LLC
				2	//
				3	// This source code is licensed under the BSD-style license found in the
				4	// LICENSE file in the root directory of this source tree.
				5
				6	#include <assert.h>
				7	#include <stdbool.h>
				8	#include <stdint.h>
				9	#include <stdlib.h>
				10
				11	#include <xnnpack/memory-planner.h>
				12	#include <xnnpack/subgraph.h>
				13
				14	// Check if two xnn_value's lifecycles overlap.
				15	inline static bool value_lifecycle_overlap(const struct xnn_value_usage* a, const struct xnn_value_usage* b) {
				16	assert(a->last_node >= a->first_node);
				17	assert(b->last_node >= b->first_node);
				18	if (a->first_node < b->first_node) {
				19	return a->last_node >= b->first_node;
				20	} else {
				21	return b->last_node >= a->first_node;
				22	}
				23	}
				24
				25	// Use this comparison function to sort xnn_value_usage according to the
				26	// tensor_size in decreasing order.
				27	static inline int cmp_value_usage_tensor_size(const void* a, const void* b) {
				28	const size_t tensor_size_a = ((struct xnn_value_usage*)a)->tensor_size;
				29	const size_t tensor_size_b = ((struct xnn_value_usage*)b)->tensor_size;
				30	return (tensor_size_b > tensor_size_a) - (tensor_size_b < tensor_size_a);
				31	}
				32
				33	static void populate_value_lifecycle(const xnn_subgraph_t subgraph, struct xnn_value_usage* usage) {
				34	assert(subgraph != NULL);
				35	if (subgraph->num_nodes == 0) {
				36	return;
				37	}
				38	// As we initialized first/last_node in each xnn_value_usage to 0 as in 'xnn_init_value_mem_allocation_tracker',
				39	// we start with the second node to tell whether first/last_node have been set or not, and check the first node last.
				40	for (uint32_t nid = 1; nid < subgraph->num_nodes; ++nid) {
				41	const struct xnn_node* node = subgraph->nodes + nid;
				42	for (uint32_t i = 0; i < node->num_inputs; ++i) {
				43	if (usage[node->inputs[i]].first_node == 0) {
				44	usage[node->inputs[i]].first_node = nid;
				45	}
				46	usage[node->inputs[i]].last_node = nid;
				47	}
				48	for (uint32_t i = 0; i < node->num_outputs; ++i) {
				49	if (usage[node->outputs[i]].first_node == 0) {
				50	usage[node->outputs[i]].first_node = nid;
				51	}
				52	usage[node->outputs[i]].last_node = nid;
				53	}
				54	}
				55	const struct xnn_node* first_node = subgraph->nodes;
				56	for (uint32_t i = 0; i < first_node->num_inputs; ++i) {
				57	usage[first_node->inputs[i]].first_node = 0;
				58	}
				59	for (uint32_t i = 0; i < first_node->num_outputs; ++i) {
				60	usage[first_node->outputs[i]].first_node = 0;
				61	}
				62	}
				63
				64	// Represent a memory block [start, end)
				65	struct memory_block {
				66	size_t start;
				67	size_t end;
				68	};
				69
				70	// Use this comparison function to sort memory_block according to the 'start'
				71	// in increasing order.
				72	static inline int cmp_memory_block(const void* a, const void* b) {
				73	const size_t start_a = ((struct memory_block*)a)->start;
				74	const size_t start_b = ((struct memory_block*)b)->start;
				75	return (start_a > start_b) - (start_a < start_b);
				76	}
				77
				78	// Given the current live memory blocks, return the offset in a memory arena for a to-be-allocated value of size
				79	// 'to_alloc_size'.
				80	static size_t find_value_alloc_offset(struct memory_block* live_mem_blocks,
				81	size_t num_mem_blocks,
				82	size_t to_alloc_size) {
				83	if (num_mem_blocks == 0) {
				84	return 0;
				85	}
				86
				87	if (num_mem_blocks == 1) {
				88	return live_mem_blocks[0].end;
				89	}
				90
				91	// Sort memory blocks according to 'start' in increasing order.
				92	qsort(live_mem_blocks, num_mem_blocks, sizeof(struct memory_block), cmp_memory_block);
				93
				94	// Coalesce overlapping or immediate adjacent memory blocks to form a list of non-overlapping memory blocks in order
				95	// to find the smallest gap.
				96	size_t num_coalesced_mem_blocks = 1;
				97	for (size_t i = 1; i < num_mem_blocks; ++i) {
				98	const size_t current_coalesced_end =
				99	live_mem_blocks[num_coalesced_mem_blocks - 1].end;
				100	if (live_mem_blocks[i].start > current_coalesced_end) {
				101	assert(num_coalesced_mem_blocks <= i);
				102	live_mem_blocks[num_coalesced_mem_blocks] = live_mem_blocks[i];
				103	num_coalesced_mem_blocks++;
				104	continue;
				105	}
				106	if (live_mem_blocks[i].end > current_coalesced_end) {
				107	live_mem_blocks[num_coalesced_mem_blocks - 1].end = live_mem_blocks[i].end;
				108	}
				109	}
				110
				111	size_t smallest_gap_size = SIZE_MAX;
				112	// The first index to live_mem_blocks that the 'to_alloc_size' should be allocated after.
				113	size_t smallest_gap_index = num_coalesced_mem_blocks - 1;
				114	for (size_t i = 0; i < num_coalesced_mem_blocks - 1; ++i) {
				115	assert(live_mem_blocks[i + 1].start > live_mem_blocks[i].end);
				116	const size_t gap = live_mem_blocks[i + 1].start - live_mem_blocks[i].end;
				117	if (gap >= to_alloc_size && gap < smallest_gap_size) {
				118	smallest_gap_index = i;
				119	smallest_gap_size = gap;
				120	}
				121	}
				122	return live_mem_blocks[smallest_gap_index].end;
				123	}
				124
				125	void xnn_init_value_allocation_tracker(struct xnn_value_allocation_tracker* tracker, const xnn_subgraph_t subgraph) {
				126	tracker->subgraph = subgraph;
				127	tracker->mem_arena_size = 0;
				128	tracker->usage = xnn_allocate_zero_memory(sizeof(struct xnn_value_usage) * subgraph->num_values);
				129	#if XNN_ENABLE_MEMOPT
				130	populate_value_lifecycle(tracker->subgraph, tracker->usage);
				131	#endif
				132	tracker->min_value_id = XNN_INVALID_VALUE_ID;
				133	tracker->max_value_id = XNN_INVALID_VALUE_ID;
				134	}
				135
				136	void xnn_add_value_allocation_tracker(struct xnn_value_allocation_tracker* tracker,
				137	uint32_t value_id,
				138	size_t tensor_size) {
				139	tracker->usage[value_id].tensor_size = tensor_size;
				140	if (tracker->min_value_id == XNN_INVALID_VALUE_ID) {
				141	tracker->min_value_id = value_id;
				142	} else {
				143	// Note that values are expected to be added in increasing order.
				144	assert(value_id > tracker->min_value_id);
				145	assert(value_id > tracker->max_value_id);
				146	}
				147
				148	tracker->max_value_id = value_id;
				149	}
				150
				151	void xnn_plan_value_allocation_tracker(struct xnn_value_allocation_tracker* tracker) {
				152	#if XNN_ENABLE_MEMOPT
				153	if (tracker->min_value_id == XNN_INVALID_VALUE_ID) {
				154	assert(tracker->max_value_id == XNN_INVALID_VALUE_ID);
				155	return;
				156	}
				157
				158	const uint32_t num_values = tracker->max_value_id - tracker->min_value_id + 1;
				159	struct xnn_value_usage** sorted_usage = xnn_allocate_zero_memory(sizeof(struct xnn_value_usage) num_values);
				160	size_t num_values_to_alloc = 0;
				161	for (size_t i = tracker->min_value_id; i <= tracker->max_value_id; ++i) {
				162	struct xnn_value_usage* info = tracker->usage + i;
				163	if (info->tensor_size != 0) {
				164	sorted_usage[num_values_to_alloc++] = info;
				165	}
				166	}
				167	qsort(sorted_usage, num_values_to_alloc, sizeof(struct xnn_value_usage*), cmp_value_usage_tensor_size);
				168
				169	// Start the allocation planning process.
				170	struct memory_block* current_live_mem_blocks = xnn_allocate_zero_memory(
				171	sizeof(struct memory_block) * num_values_to_alloc);
				172	size_t mem_arena_size = 0;
				173	for (size_t i = 0; i < num_values_to_alloc; ++i) {
				174	size_t num_live_mem_blocks = 0;
				175	struct xnn_value_usage* current = sorted_usage[i];
				176	for (size_t j = 0; j < i; ++j) {
				177	const struct xnn_value_usage* allocated = sorted_usage[j];
				178	if (value_lifecycle_overlap(current, allocated)) {
				179	current_live_mem_blocks[num_live_mem_blocks++] = (struct memory_block){
				180	.start = allocated->alloc_offset,
				181	.end = allocated->alloc_offset + allocated->tensor_size,
				182	};
				183	}
				184	}
				185	current->alloc_offset = find_value_alloc_offset(current_live_mem_blocks, num_live_mem_blocks, current->tensor_size);
				186	if (mem_arena_size < current->alloc_offset + current->tensor_size) {
				187	mem_arena_size = current->alloc_offset + current->tensor_size;
				188	}
				189	}
				190
				191	tracker->mem_arena_size = mem_arena_size;
				192	xnn_release_memory(sorted_usage);
				193	xnn_release_memory(current_live_mem_blocks);
				194	#else
				195	tracker->mem_arena_size = 0;
				196	for (uint32_t i = tracker->min_value_id; i <= tracker->max_value_id; ++i) {
				197	if (tracker->usage[i].tensor_size > 0) {
				198	tracker->usage[i].alloc_offset = tracker->mem_arena_size;
				199	tracker->mem_arena_size += tracker->usage[i].tensor_size;
				200	}
				201	}
				202	#endif
				203	}