Blame - mm/zbud.c - kernel/msm-4.9

blob: 9451361e6aa701557e9d028e738e9d6e3024756f [file] [log] [blame]

Seth Jennings	4e2e277	2013-07-10 16:04:55 -0700	[diff] [blame]	1	/*
				2	* zbud.c
				3	*
				4	* Copyright (C) 2013, Seth Jennings, IBM
				5	*
				6	* Concepts based on zcache internal zbud allocator by Dan Magenheimer.
				7	*
				8	* zbud is an special purpose allocator for storing compressed pages. Contrary
				9	* to what its name may suggest, zbud is not a buddy allocator, but rather an
				10	* allocator that "buddies" two compressed pages together in a single memory
				11	* page.
				12	*
				13	* While this design limits storage density, it has simple and deterministic
				14	* reclaim properties that make it preferable to a higher density approach when
				15	* reclaim will be used.
				16	*
				17	* zbud works by storing compressed pages, or "zpages", together in pairs in a
				18	* single memory page called a "zbud page". The first buddy is "left
Jianguo Wu	eee87e1	2013-09-11 14:21:42 -0700	[diff] [blame]	19	* justified" at the beginning of the zbud page, and the last buddy is "right
Seth Jennings	4e2e277	2013-07-10 16:04:55 -0700	[diff] [blame]	20	* justified" at the end of the zbud page. The benefit is that if either
				21	* buddy is freed, the freed buddy space, coalesced with whatever slack space
				22	* that existed between the buddies, results in the largest possible free region
				23	* within the zbud page.
				24	*
				25	* zbud also provides an attractive lower bound on density. The ratio of zpages
				26	* to zbud pages can not be less than 1. This ensures that zbud can never "do
				27	* harm" by using more pages to store zpages than the uncompressed zpages would
				28	* have used on their own.
				29	*
				30	* zbud pages are divided into "chunks". The size of the chunks is fixed at
				31	* compile time and determined by NCHUNKS_ORDER below. Dividing zbud pages
				32	* into chunks allows organizing unbuddied zbud pages into a manageable number
				33	* of unbuddied lists according to the number of free chunks available in the
				34	* zbud page.
				35	*
				36	* The zbud API differs from that of conventional allocators in that the
				37	* allocation function, zbud_alloc(), returns an opaque handle to the user,
				38	* not a dereferenceable pointer. The user must map the handle using
				39	* zbud_map() in order to get a usable pointer by which to access the
				40	* allocation data and unmap the handle with zbud_unmap() when operations
				41	* on the allocation data are complete.
				42	*/
				43
				44	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
				45
				46	#include <linux/atomic.h>
				47	#include <linux/list.h>
				48	#include <linux/mm.h>
				49	#include <linux/module.h>
				50	#include <linux/preempt.h>
				51	#include <linux/slab.h>
				52	#include <linux/spinlock.h>
				53	#include <linux/zbud.h>
				54
				55	/*****************
				56	* Structures
				57	*****************/
				58	/*
				59	* NCHUNKS_ORDER determines the internal allocation granularity, effectively
				60	* adjusting internal fragmentation. It also determines the number of
				61	* freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the
				62	* allocation granularity will be in chunks of size PAGE_SIZE/64, and there
				63	* will be 64 freelists per pool.
				64	*/
				65	#define NCHUNKS_ORDER 6
				66
				67	#define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER)
				68	#define CHUNK_SIZE (1 << CHUNK_SHIFT)
				69	#define NCHUNKS (PAGE_SIZE >> CHUNK_SHIFT)
				70	#define ZHDR_SIZE_ALIGNED CHUNK_SIZE
				71
				72	/**
				73	* struct zbud_pool - stores metadata for each zbud pool
				74	* @lock: protects all pool fields and first\|last_chunk fields of any
				75	* zbud page in the pool
				76	* @unbuddied: array of lists tracking zbud pages that only contain one buddy;
				77	* the lists each zbud page is added to depends on the size of
				78	* its free region.
				79	* @buddied: list tracking the zbud pages that contain two buddies;
				80	* these zbud pages are full
				81	* @lru: list tracking the zbud pages in LRU order by most recently
				82	* added buddy.
				83	* @pages_nr: number of zbud pages in the pool.
				84	* @ops: pointer to a structure of user defined operations specified at
				85	* pool creation time.
				86	*
				87	* This structure is allocated at pool creation time and maintains metadata
				88	* pertaining to a particular zbud pool.
				89	*/
				90	struct zbud_pool {
				91	spinlock_t lock;
				92	struct list_head unbuddied[NCHUNKS];
				93	struct list_head buddied;
				94	struct list_head lru;
				95	u64 pages_nr;
				96	struct zbud_ops *ops;
				97	};
				98
				99	/*
				100	* struct zbud_header - zbud page metadata occupying the first chunk of each
				101	* zbud page.
				102	* @buddy: links the zbud page into the unbuddied/buddied lists in the pool
				103	* @lru: links the zbud page into the lru list in the pool
				104	* @first_chunks: the size of the first buddy in chunks, 0 if free
				105	* @last_chunks: the size of the last buddy in chunks, 0 if free
				106	*/
				107	struct zbud_header {
				108	struct list_head buddy;
				109	struct list_head lru;
				110	unsigned int first_chunks;
				111	unsigned int last_chunks;
				112	bool under_reclaim;
				113	};
				114
				115	/*****************
				116	* Helpers
				117	*****************/
				118	/* Just to make the code easier to read */
				119	enum buddy {
				120	FIRST,
				121	LAST
				122	};
				123
				124	/* Converts an allocation size in bytes to size in zbud chunks */
				125	static int size_to_chunks(int size)
				126	{
				127	return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
				128	}
				129
				130	#define for_each_unbuddied_list(_iter, _begin) \
				131	for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++)
				132
				133	/* Initializes the zbud header of a newly allocated zbud page */
				134	static struct zbud_header init_zbud_page(struct page page)
				135	{
				136	struct zbud_header *zhdr = page_address(page);
				137	zhdr->first_chunks = 0;
				138	zhdr->last_chunks = 0;
				139	INIT_LIST_HEAD(&zhdr->buddy);
				140	INIT_LIST_HEAD(&zhdr->lru);
				141	zhdr->under_reclaim = 0;
				142	return zhdr;
				143	}
				144
				145	/* Resets the struct page fields and frees the page */
				146	static void free_zbud_page(struct zbud_header *zhdr)
				147	{
				148	__free_page(virt_to_page(zhdr));
				149	}
				150
				151	/*
				152	* Encodes the handle of a particular buddy within a zbud page
				153	* Pool lock should be held as this function accesses first\|last_chunks
				154	*/
				155	static unsigned long encode_handle(struct zbud_header *zhdr, enum buddy bud)
				156	{
				157	unsigned long handle;
				158
				159	/*
				160	* For now, the encoded handle is actually just the pointer to the data
				161	* but this might not always be the case. A little information hiding.
				162	* Add CHUNK_SIZE to the handle if it is the first allocation to jump
				163	* over the zbud header in the first chunk.
				164	*/
				165	handle = (unsigned long)zhdr;
				166	if (bud == FIRST)
				167	/* skip over zbud header */
				168	handle += ZHDR_SIZE_ALIGNED;
				169	else /* bud == LAST */
				170	handle += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT);
				171	return handle;
				172	}
				173
				174	/* Returns the zbud page where a given handle is stored */
				175	static struct zbud_header *handle_to_zbud_header(unsigned long handle)
				176	{
				177	return (struct zbud_header *)(handle & PAGE_MASK);
				178	}
				179
				180	/* Returns the number of free chunks in a zbud page */
				181	static int num_free_chunks(struct zbud_header *zhdr)
				182	{
				183	/*
				184	* Rather than branch for different situations, just use the fact that
				185	* free buddies have a length of zero to simplify everything. -1 at the
				186	* end for the zbud header.
				187	*/
				188	return NCHUNKS - zhdr->first_chunks - zhdr->last_chunks - 1;
				189	}
				190
				191	/*****************
				192	* API Functions
				193	*****************/
				194	/**
				195	* zbud_create_pool() - create a new zbud pool
				196	* @gfp: gfp flags when allocating the zbud pool structure
				197	* @ops: user-defined operations for the zbud pool
				198	*
				199	* Return: pointer to the new zbud pool or NULL if the metadata allocation
				200	* failed.
				201	*/
				202	struct zbud_pool zbud_create_pool(gfp_t gfp, struct zbud_ops ops)
				203	{
				204	struct zbud_pool *pool;
				205	int i;
				206
				207	pool = kmalloc(sizeof(struct zbud_pool), gfp);
				208	if (!pool)
				209	return NULL;
				210	spin_lock_init(&pool->lock);
				211	for_each_unbuddied_list(i, 0)
				212	INIT_LIST_HEAD(&pool->unbuddied[i]);
				213	INIT_LIST_HEAD(&pool->buddied);
				214	INIT_LIST_HEAD(&pool->lru);
				215	pool->pages_nr = 0;
				216	pool->ops = ops;
				217	return pool;
				218	}
				219
				220	/**
				221	* zbud_destroy_pool() - destroys an existing zbud pool
				222	* @pool: the zbud pool to be destroyed
				223	*
				224	* The pool should be emptied before this function is called.
				225	*/
				226	void zbud_destroy_pool(struct zbud_pool *pool)
				227	{
				228	kfree(pool);
				229	}
				230
				231	/**
				232	* zbud_alloc() - allocates a region of a given size
				233	* @pool: zbud pool from which to allocate
				234	* @size: size in bytes of the desired allocation
				235	* @gfp: gfp flags used if the pool needs to grow
				236	* @handle: handle of the new allocation
				237	*
				238	* This function will attempt to find a free region in the pool large enough to
				239	* satisfy the allocation request. A search of the unbuddied lists is
				240	* performed first. If no suitable free region is found, then a new page is
				241	* allocated and added to the pool to satisfy the request.
				242	*
				243	* gfp should not set __GFP_HIGHMEM as highmem pages cannot be used
				244	* as zbud pool pages.
				245	*
Jianguo Wu	eee87e1	2013-09-11 14:21:42 -0700	[diff] [blame]	246	* Return: 0 if success and handle is set, otherwise -EINVAL if the size or
Seth Jennings	4e2e277	2013-07-10 16:04:55 -0700	[diff] [blame]	247	* gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
				248	* a new page.
				249	*/
				250	int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp,
				251	unsigned long *handle)
				252	{
				253	int chunks, i, freechunks;
				254	struct zbud_header *zhdr = NULL;
				255	enum buddy bud;
				256	struct page *page;
				257
				258	if (size <= 0 \|\| gfp & __GFP_HIGHMEM)
				259	return -EINVAL;
Heesub Shin	9d8c5b5	2013-07-31 13:53:40 -0700	[diff] [blame]	260	if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
Seth Jennings	4e2e277	2013-07-10 16:04:55 -0700	[diff] [blame]	261	return -ENOSPC;
				262	chunks = size_to_chunks(size);
				263	spin_lock(&pool->lock);
				264
				265	/* First, try to find an unbuddied zbud page. */
				266	zhdr = NULL;
				267	for_each_unbuddied_list(i, chunks) {
				268	if (!list_empty(&pool->unbuddied[i])) {
				269	zhdr = list_first_entry(&pool->unbuddied[i],
				270	struct zbud_header, buddy);
				271	list_del(&zhdr->buddy);
				272	if (zhdr->first_chunks == 0)
				273	bud = FIRST;
				274	else
				275	bud = LAST;
				276	goto found;
				277	}
				278	}
				279
				280	/* Couldn't find unbuddied zbud page, create new one */
				281	spin_unlock(&pool->lock);
				282	page = alloc_page(gfp);
				283	if (!page)
				284	return -ENOMEM;
				285	spin_lock(&pool->lock);
				286	pool->pages_nr++;
				287	zhdr = init_zbud_page(page);
				288	bud = FIRST;
				289
				290	found:
				291	if (bud == FIRST)
				292	zhdr->first_chunks = chunks;
				293	else
				294	zhdr->last_chunks = chunks;
				295
				296	if (zhdr->first_chunks == 0 \|\| zhdr->last_chunks == 0) {
				297	/* Add to unbuddied list */
				298	freechunks = num_free_chunks(zhdr);
				299	list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
				300	} else {
				301	/* Add to buddied list */
				302	list_add(&zhdr->buddy, &pool->buddied);
				303	}
				304
				305	/* Add/move zbud page to beginning of LRU */
				306	if (!list_empty(&zhdr->lru))
				307	list_del(&zhdr->lru);
				308	list_add(&zhdr->lru, &pool->lru);
				309
				310	*handle = encode_handle(zhdr, bud);
				311	spin_unlock(&pool->lock);
				312
				313	return 0;
				314	}
				315
				316	/**
				317	* zbud_free() - frees the allocation associated with the given handle
				318	* @pool: pool in which the allocation resided
				319	* @handle: handle associated with the allocation returned by zbud_alloc()
				320	*
				321	* In the case that the zbud page in which the allocation resides is under
				322	* reclaim, as indicated by the PG_reclaim flag being set, this function
				323	* only sets the first\|last_chunks to 0. The page is actually freed
				324	* once both buddies are evicted (see zbud_reclaim_page() below).
				325	*/
				326	void zbud_free(struct zbud_pool *pool, unsigned long handle)
				327	{
				328	struct zbud_header *zhdr;
				329	int freechunks;
				330
				331	spin_lock(&pool->lock);
				332	zhdr = handle_to_zbud_header(handle);
				333
				334	/* If first buddy, handle will be page aligned */
				335	if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK)
				336	zhdr->last_chunks = 0;
				337	else
				338	zhdr->first_chunks = 0;
				339
				340	if (zhdr->under_reclaim) {
				341	/* zbud page is under reclaim, reclaim will free */
				342	spin_unlock(&pool->lock);
				343	return;
				344	}
				345
				346	/* Remove from existing buddy list */
				347	list_del(&zhdr->buddy);
				348
				349	if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
				350	/* zbud page is empty, free */
				351	list_del(&zhdr->lru);
				352	free_zbud_page(zhdr);
				353	pool->pages_nr--;
				354	} else {
				355	/* Add to unbuddied list */
				356	freechunks = num_free_chunks(zhdr);
				357	list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
				358	}
				359
				360	spin_unlock(&pool->lock);
				361	}
				362
				363	#define list_tail_entry(ptr, type, member) \
				364	list_entry((ptr)->prev, type, member)
				365
				366	/**
				367	* zbud_reclaim_page() - evicts allocations from a pool page and frees it
				368	* @pool: pool from which a page will attempt to be evicted
				369	* @retires: number of pages on the LRU list for which eviction will
				370	* be attempted before failing
				371	*
				372	* zbud reclaim is different from normal system reclaim in that the reclaim is
				373	* done from the bottom, up. This is because only the bottom layer, zbud, has
				374	* information on how the allocations are organized within each zbud page. This
				375	* has the potential to create interesting locking situations between zbud and
				376	* the user, however.
				377	*
				378	* To avoid these, this is how zbud_reclaim_page() should be called:
				379
				380	* The user detects a page should be reclaimed and calls zbud_reclaim_page().
				381	* zbud_reclaim_page() will remove a zbud page from the pool LRU list and call
				382	* the user-defined eviction handler with the pool and handle as arguments.
				383	*
				384	* If the handle can not be evicted, the eviction handler should return
				385	* non-zero. zbud_reclaim_page() will add the zbud page back to the
				386	* appropriate list and try the next zbud page on the LRU up to
				387	* a user defined number of retries.
				388	*
				389	* If the handle is successfully evicted, the eviction handler should
				390	* return 0 _and_ should have called zbud_free() on the handle. zbud_free()
				391	* contains logic to delay freeing the page if the page is under reclaim,
				392	* as indicated by the setting of the PG_reclaim flag on the underlying page.
				393	*
				394	* If all buddies in the zbud page are successfully evicted, then the
				395	* zbud page can be freed.
				396	*
				397	* Returns: 0 if page is successfully freed, otherwise -EINVAL if there are
				398	* no pages to evict or an eviction handler is not registered, -EAGAIN if
				399	* the retry limit was hit.
				400	*/
				401	int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
				402	{
				403	int i, ret, freechunks;
				404	struct zbud_header *zhdr;
				405	unsigned long first_handle = 0, last_handle = 0;
				406
				407	spin_lock(&pool->lock);
				408	if (!pool->ops \|\| !pool->ops->evict \|\| list_empty(&pool->lru) \|\|
				409	retries == 0) {
				410	spin_unlock(&pool->lock);
				411	return -EINVAL;
				412	}
				413	for (i = 0; i < retries; i++) {
				414	zhdr = list_tail_entry(&pool->lru, struct zbud_header, lru);
				415	list_del(&zhdr->lru);
				416	list_del(&zhdr->buddy);
				417	/* Protect zbud page against free */
				418	zhdr->under_reclaim = true;
				419	/*
				420	* We need encode the handles before unlocking, since we can
				421	* race with free that will set (first\|last)_chunks to 0
				422	*/
				423	first_handle = 0;
				424	last_handle = 0;
				425	if (zhdr->first_chunks)
				426	first_handle = encode_handle(zhdr, FIRST);
				427	if (zhdr->last_chunks)
				428	last_handle = encode_handle(zhdr, LAST);
				429	spin_unlock(&pool->lock);
				430
				431	/* Issue the eviction callback(s) */
				432	if (first_handle) {
				433	ret = pool->ops->evict(pool, first_handle);
				434	if (ret)
				435	goto next;
				436	}
				437	if (last_handle) {
				438	ret = pool->ops->evict(pool, last_handle);
				439	if (ret)
				440	goto next;
				441	}
				442	next:
				443	spin_lock(&pool->lock);
				444	zhdr->under_reclaim = false;
				445	if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
				446	/*
				447	* Both buddies are now free, free the zbud page and
				448	* return success.
				449	*/
				450	free_zbud_page(zhdr);
				451	pool->pages_nr--;
				452	spin_unlock(&pool->lock);
				453	return 0;
				454	} else if (zhdr->first_chunks == 0 \|\|
				455	zhdr->last_chunks == 0) {
				456	/* add to unbuddied list */
				457	freechunks = num_free_chunks(zhdr);
				458	list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
				459	} else {
				460	/* add to buddied list */
				461	list_add(&zhdr->buddy, &pool->buddied);
				462	}
				463
				464	/* add to beginning of LRU */
				465	list_add(&zhdr->lru, &pool->lru);
				466	}
				467	spin_unlock(&pool->lock);
				468	return -EAGAIN;
				469	}
				470
				471	/**
				472	* zbud_map() - maps the allocation associated with the given handle
				473	* @pool: pool in which the allocation resides
				474	* @handle: handle associated with the allocation to be mapped
				475	*
				476	* While trivial for zbud, the mapping functions for others allocators
				477	* implementing this allocation API could have more complex information encoded
				478	* in the handle and could create temporary mappings to make the data
				479	* accessible to the user.
				480	*
				481	* Returns: a pointer to the mapped allocation
				482	*/
				483	void zbud_map(struct zbud_pool pool, unsigned long handle)
				484	{
				485	return (void *)(handle);
				486	}
				487
				488	/**
				489	* zbud_unmap() - maps the allocation associated with the given handle
				490	* @pool: pool in which the allocation resides
				491	* @handle: handle associated with the allocation to be unmapped
				492	*/
				493	void zbud_unmap(struct zbud_pool *pool, unsigned long handle)
				494	{
				495	}
				496
				497	/**
				498	* zbud_get_pool_size() - gets the zbud pool size in pages
				499	* @pool: pool whose size is being queried
				500	*
				501	* Returns: size in pages of the given pool. The pool lock need not be
				502	* taken to access pages_nr.
				503	*/
				504	u64 zbud_get_pool_size(struct zbud_pool *pool)
				505	{
				506	return pool->pages_nr;
				507	}
				508
				509	static int __init init_zbud(void)
				510	{
				511	/* Make sure the zbud header will fit in one chunk */
				512	BUILD_BUG_ON(sizeof(struct zbud_header) > ZHDR_SIZE_ALIGNED);
				513	pr_info("loaded\n");
				514	return 0;
				515	}
				516
				517	static void __exit exit_zbud(void)
				518	{
				519	pr_info("unloaded\n");
				520	}
				521
				522	module_init(init_zbud);
				523	module_exit(exit_zbud);
				524
				525	MODULE_LICENSE("GPL");
				526	MODULE_AUTHOR("Seth Jennings <sjenning@linux.vnet.ibm.com>");
				527	MODULE_DESCRIPTION("Buddy Allocator for Compressed Pages");