Blame - mm/zsmalloc.c - kernel/msm-4.9

blob: 0dec1fa5f6565dab6be9c9515d52c9caae8e0ea6 [file] [log] [blame]

Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1	/*
				2	* zsmalloc memory allocator
				3	*
				4	* Copyright (C) 2011 Nitin Gupta
Minchan Kim	31fc00b	2014-01-30 15:45:55 -0800	[diff] [blame]	5	* Copyright (C) 2012, 2013 Minchan Kim
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	6	*
				7	* This code is released using a dual license strategy: BSD/GPL
				8	* You can choose the license that better fits your requirements.
				9	*
				10	* Released under the terms of 3-clause BSD License
				11	* Released under the terms of GNU General Public License Version 2.0
				12	*/
				13
Nitin Gupta	2db51da	2012-06-09 17:41:14 -0700	[diff] [blame]	14	/*
Nitin Cupta	c3e3e88	2013-12-11 11:04:37 +0900	[diff] [blame]	15	* This allocator is designed for use with zram. Thus, the allocator is
				16	* supposed to work well under low memory conditions. In particular, it
				17	* never attempts higher order page allocation which is very likely to
				18	* fail under memory pressure. On the other hand, if we just use single
				19	* (0-order) pages, it would suffer from very high fragmentation --
				20	* any object of size PAGE_SIZE/2 or larger would occupy an entire page.
				21	* This was one of the major issues with its predecessor (xvmalloc).
Nitin Gupta	2db51da	2012-06-09 17:41:14 -0700	[diff] [blame]	22	*
				23	* To overcome these issues, zsmalloc allocates a bunch of 0-order pages
				24	* and links them together using various 'struct page' fields. These linked
				25	* pages act as a single higher-order page i.e. an object can span 0-order
				26	* page boundaries. The code refers to these linked pages as a single entity
				27	* called zspage.
				28	*
Nitin Cupta	c3e3e88	2013-12-11 11:04:37 +0900	[diff] [blame]	29	* For simplicity, zsmalloc can only allocate objects of size up to PAGE_SIZE
				30	* since this satisfies the requirements of all its current users (in the
				31	* worst case, page is incompressible and is thus stored "as-is" i.e. in
				32	* uncompressed form). For allocation requests larger than this size, failure
				33	* is returned (see zs_malloc).
				34	*
				35	* Additionally, zs_malloc() does not return a dereferenceable pointer.
				36	* Instead, it returns an opaque handle (unsigned long) which encodes actual
				37	* location of the allocated object. The reason for this indirection is that
				38	* zsmalloc does not keep zspages permanently mapped since that would cause
				39	* issues on 32-bit systems where the VA region for kernel space mappings
				40	* is very small. So, before using the allocating memory, the object has to
				41	* be mapped using zs_map_object() to get a usable pointer and subsequently
				42	* unmapped using zs_unmap_object().
				43	*
Nitin Gupta	2db51da	2012-06-09 17:41:14 -0700	[diff] [blame]	44	* Following is how we use various fields and flags of underlying
				45	* struct page(s) to form a zspage.
				46	*
				47	* Usage of struct page fields:
				48	* page->first_page: points to the first component (0-order) page
				49	* page->index (union with page->freelist): offset of the first object
				50	* starting in this page. For the first page, this is
				51	* always 0, so we use this field (aka freelist) to point
				52	* to the first free object in zspage.
				53	* page->lru: links together all component pages (except the first page)
				54	* of a zspage
				55	*
				56	* For _first_ page only:
				57	*
				58	* page->private (union with page->first_page): refers to the
				59	* component page after the first page
				60	* page->freelist: points to the first free object in zspage.
				61	* Free objects are linked together using in-place
				62	* metadata.
				63	* page->objects: maximum number of objects we can store in this
				64	* zspage (class->zspage_order * PAGE_SIZE / class->size)
				65	* page->lru: links together first pages of various zspages.
				66	* Basically forming list of zspages in a fullness group.
				67	* page->mapping: class index and fullness group of the zspage
				68	*
				69	* Usage of struct page flags:
				70	* PG_private: identifies the first component page
				71	* PG_private2: identifies the last component page
				72	*
				73	*/
				74
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	75	#ifdef CONFIG_ZSMALLOC_DEBUG
				76	#define DEBUG
				77	#endif
				78
				79	#include <linux/module.h>
				80	#include <linux/kernel.h>
				81	#include <linux/bitops.h>
				82	#include <linux/errno.h>
				83	#include <linux/highmem.h>
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	84	#include <linux/string.h>
				85	#include <linux/slab.h>
				86	#include <asm/tlbflush.h>
				87	#include <asm/pgtable.h>
				88	#include <linux/cpumask.h>
				89	#include <linux/cpu.h>
Seth Jennings	0cbb613	2012-02-13 08:47:49 -0600	[diff] [blame]	90	#include <linux/vmalloc.h>
Seth Jennings	c60369f	2012-07-18 11:55:55 -0500	[diff] [blame]	91	#include <linux/hardirq.h>
Seth Jennings	0959c63	2012-08-08 15:12:17 +0900	[diff] [blame]	92	#include <linux/spinlock.h>
				93	#include <linux/types.h>
Ganesh Mahendran	0f050d9	2015-02-12 15:00:54 -0800	[diff] [blame]	94	#include <linux/debugfs.h>
Minchan Kim	bcf1647	2014-01-30 15:45:50 -0800	[diff] [blame]	95	#include <linux/zsmalloc.h>
Dan Streetman	c795779	2014-08-06 16:08:38 -0700	[diff] [blame]	96	#include <linux/zpool.h>
Seth Jennings	0959c63	2012-08-08 15:12:17 +0900	[diff] [blame]	97
				98	/*
				99	* This must be power of 2 and greater than of equal to sizeof(link_free).
				100	* These two conditions ensure that any 'struct link_free' itself doesn't
				101	* span more than 1 page which avoids complex case of mapping 2 pages simply
				102	* to restore link_free pointer values.
				103	*/
				104	#define ZS_ALIGN 8
				105
				106	/*
				107	* A single 'zspage' is composed of up to 2^N discontiguous 0-order (single)
				108	* pages. ZS_MAX_ZSPAGE_ORDER defines upper limit on N.
				109	*/
				110	#define ZS_MAX_ZSPAGE_ORDER 2
				111	#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
				112
				113	/*
				114	* Object location (<PFN>, <obj_idx>) is encoded as
Nitin Cupta	c3e3e88	2013-12-11 11:04:37 +0900	[diff] [blame]	115	* as single (unsigned long) handle value.
Seth Jennings	0959c63	2012-08-08 15:12:17 +0900	[diff] [blame]	116	*
				117	* Note that object index <obj_idx> is relative to system
				118	* page <PFN> it is stored in, so for each sub-page belonging
				119	* to a zspage, obj_idx starts with 0.
				120	*
				121	* This is made more complicated by various memory models and PAE.
				122	*/
				123
				124	#ifndef MAX_PHYSMEM_BITS
				125	#ifdef CONFIG_HIGHMEM64G
				126	#define MAX_PHYSMEM_BITS 36
				127	#else /* !CONFIG_HIGHMEM64G */
				128	/*
				129	* If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
				130	* be PAGE_SHIFT
				131	*/
				132	#define MAX_PHYSMEM_BITS BITS_PER_LONG
				133	#endif
				134	#endif
				135	#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
				136	#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS)
				137	#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
				138
				139	#define MAX(a, b) ((a) >= (b) ? (a) : (b))
				140	/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
				141	#define ZS_MIN_ALLOC_SIZE \
				142	MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
				143	#define ZS_MAX_ALLOC_SIZE PAGE_SIZE
				144
				145	/*
Weijie Yang	7eb5251	2014-06-04 16:11:08 -0700	[diff] [blame]	146	* On systems with 4K page size, this gives 255 size classes! There is a
Seth Jennings	0959c63	2012-08-08 15:12:17 +0900	[diff] [blame]	147	* trader-off here:
				148	* - Large number of size classes is potentially wasteful as free page are
				149	* spread across these classes
				150	* - Small number of size classes causes large internal fragmentation
				151	* - Probably its better to use specific size classes (empirically
				152	* determined). NOTE: all those class sizes must be set as multiple of
				153	* ZS_ALIGN to make sure link_free itself never has to span 2 pages.
				154	*
				155	* ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN
				156	* (reason above)
				157	*/
Seth Jennings	d662b8e	2013-01-25 11:46:18 -0600	[diff] [blame]	158	#define ZS_SIZE_CLASS_DELTA (PAGE_SIZE >> 8)
Seth Jennings	0959c63	2012-08-08 15:12:17 +0900	[diff] [blame]	159
				160	/*
				161	* We do not maintain any list for completely empty or full pages
				162	*/
				163	enum fullness_group {
				164	ZS_ALMOST_FULL,
				165	ZS_ALMOST_EMPTY,
				166	_ZS_NR_FULLNESS_GROUPS,
				167
				168	ZS_EMPTY,
				169	ZS_FULL
				170	};
				171
Ganesh Mahendran	0f050d9	2015-02-12 15:00:54 -0800	[diff] [blame]	172	enum zs_stat_type {
				173	OBJ_ALLOCATED,
				174	OBJ_USED,
				175	NR_ZS_STAT_TYPE,
				176	};
				177
				178	#ifdef CONFIG_ZSMALLOC_STAT
				179
				180	static struct dentry *zs_stat_root;
				181
				182	struct zs_size_stat {
				183	unsigned long objs[NR_ZS_STAT_TYPE];
				184	};
				185
				186	#endif
				187
Seth Jennings	0959c63	2012-08-08 15:12:17 +0900	[diff] [blame]	188	/*
Mahendran Ganesh	40f9fb8	2014-12-12 16:57:01 -0800	[diff] [blame]	189	* number of size_classes
				190	*/
				191	static int zs_size_classes;
				192
				193	/*
Seth Jennings	0959c63	2012-08-08 15:12:17 +0900	[diff] [blame]	194	* We assign a page to ZS_ALMOST_EMPTY fullness group when:
				195	* n <= N / f, where
				196	* n = number of allocated objects
				197	* N = total number of objects zspage can store
Wang Sheng-Hui	6dd9737	2014-10-09 15:29:59 -0700	[diff] [blame]	198	* f = fullness_threshold_frac
Seth Jennings	0959c63	2012-08-08 15:12:17 +0900	[diff] [blame]	199	*
				200	* Similarly, we assign zspage to:
				201	* ZS_ALMOST_FULL when n > N / f
				202	* ZS_EMPTY when n == 0
				203	* ZS_FULL when n == N
				204	*
				205	* (see: fix_fullness_group())
				206	*/
				207	static const int fullness_threshold_frac = 4;
				208
				209	struct size_class {
				210	/*
				211	* Size of objects stored in this class. Must be multiple
				212	* of ZS_ALIGN.
				213	*/
				214	int size;
				215	unsigned int index;
				216
				217	/* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
				218	int pages_per_zspage;
				219
Ganesh Mahendran	0f050d9	2015-02-12 15:00:54 -0800	[diff] [blame]	220	#ifdef CONFIG_ZSMALLOC_STAT
				221	struct zs_size_stat stats;
				222	#endif
				223
Seth Jennings	0959c63	2012-08-08 15:12:17 +0900	[diff] [blame]	224	spinlock_t lock;
				225
Seth Jennings	0959c63	2012-08-08 15:12:17 +0900	[diff] [blame]	226	struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS];
				227	};
				228
				229	/*
				230	* Placed within free objects to form a singly linked list.
				231	* For every zspage, first_page->freelist gives head of this list.
				232	*
				233	* This must be power of 2 and less than or equal to ZS_ALIGN
				234	*/
				235	struct link_free {
				236	/* Handle of next free chunk (encodes <PFN, obj_idx>) */
				237	void *next;
				238	};
				239
				240	struct zs_pool {
Ganesh Mahendran	0f050d9	2015-02-12 15:00:54 -0800	[diff] [blame]	241	char *name;
				242
Mahendran Ganesh	40f9fb8	2014-12-12 16:57:01 -0800	[diff] [blame]	243	struct size_class **size_class;
Seth Jennings	0959c63	2012-08-08 15:12:17 +0900	[diff] [blame]	244
				245	gfp_t flags; /* allocation flags used when growing pool */
Minchan Kim	13de893	2014-10-09 15:29:48 -0700	[diff] [blame]	246	atomic_long_t pages_allocated;
Ganesh Mahendran	0f050d9	2015-02-12 15:00:54 -0800	[diff] [blame]	247
				248	#ifdef CONFIG_ZSMALLOC_STAT
				249	struct dentry *stat_dentry;
				250	#endif
Seth Jennings	0959c63	2012-08-08 15:12:17 +0900	[diff] [blame]	251	};
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	252
				253	/*
				254	* A zspage's class index and fullness group
				255	* are encoded in its (first)page->mapping
				256	*/
				257	#define CLASS_IDX_BITS 28
				258	#define FULLNESS_BITS 4
				259	#define CLASS_IDX_MASK ((1 << CLASS_IDX_BITS) - 1)
				260	#define FULLNESS_MASK ((1 << FULLNESS_BITS) - 1)
				261
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	262	struct mapping_area {
Minchan Kim	1b945ae	2013-12-11 11:04:36 +0900	[diff] [blame]	263	#ifdef CONFIG_PGTABLE_MAPPING
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	264	struct vm_struct vm; / vm area for mapping object that span pages */
				265	#else
				266	char vm_buf; / copy buffer for objects that span pages */
				267	#endif
				268	char vm_addr; / address of kmap_atomic()'ed pages */
				269	enum zs_mapmode vm_mm; /* mapping mode */
				270	};
				271
Dan Streetman	c795779	2014-08-06 16:08:38 -0700	[diff] [blame]	272	/* zpool driver */
				273
				274	#ifdef CONFIG_ZPOOL
				275
Ganesh Mahendran	3eba0c6	2015-02-12 15:00:51 -0800	[diff] [blame]	276	static void zs_zpool_create(char name, gfp_t gfp, struct zpool_ops *zpool_ops)
Dan Streetman	c795779	2014-08-06 16:08:38 -0700	[diff] [blame]	277	{
Ganesh Mahendran	3eba0c6	2015-02-12 15:00:51 -0800	[diff] [blame]	278	return zs_create_pool(name, gfp);
Dan Streetman	c795779	2014-08-06 16:08:38 -0700	[diff] [blame]	279	}
				280
				281	static void zs_zpool_destroy(void *pool)
				282	{
				283	zs_destroy_pool(pool);
				284	}
				285
				286	static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp,
				287	unsigned long *handle)
				288	{
				289	*handle = zs_malloc(pool, size);
				290	return *handle ? 0 : -1;
				291	}
				292	static void zs_zpool_free(void *pool, unsigned long handle)
				293	{
				294	zs_free(pool, handle);
				295	}
				296
				297	static int zs_zpool_shrink(void *pool, unsigned int pages,
				298	unsigned int *reclaimed)
				299	{
				300	return -EINVAL;
				301	}
				302
				303	static void zs_zpool_map(void pool, unsigned long handle,
				304	enum zpool_mapmode mm)
				305	{
				306	enum zs_mapmode zs_mm;
				307
				308	switch (mm) {
				309	case ZPOOL_MM_RO:
				310	zs_mm = ZS_MM_RO;
				311	break;
				312	case ZPOOL_MM_WO:
				313	zs_mm = ZS_MM_WO;
				314	break;
				315	case ZPOOL_MM_RW: /* fallthru */
				316	default:
				317	zs_mm = ZS_MM_RW;
				318	break;
				319	}
				320
				321	return zs_map_object(pool, handle, zs_mm);
				322	}
				323	static void zs_zpool_unmap(void *pool, unsigned long handle)
				324	{
				325	zs_unmap_object(pool, handle);
				326	}
				327
				328	static u64 zs_zpool_total_size(void *pool)
				329	{
Minchan Kim	722cdc1	2014-10-09 15:29:50 -0700	[diff] [blame]	330	return zs_get_total_pages(pool) << PAGE_SHIFT;
Dan Streetman	c795779	2014-08-06 16:08:38 -0700	[diff] [blame]	331	}
				332
				333	static struct zpool_driver zs_zpool_driver = {
				334	.type = "zsmalloc",
				335	.owner = THIS_MODULE,
				336	.create = zs_zpool_create,
				337	.destroy = zs_zpool_destroy,
				338	.malloc = zs_zpool_malloc,
				339	.free = zs_zpool_free,
				340	.shrink = zs_zpool_shrink,
				341	.map = zs_zpool_map,
				342	.unmap = zs_zpool_unmap,
				343	.total_size = zs_zpool_total_size,
				344	};
				345
Kees Cook	137f8cf	2014-08-29 15:18:40 -0700	[diff] [blame]	346	MODULE_ALIAS("zpool-zsmalloc");
Dan Streetman	c795779	2014-08-06 16:08:38 -0700	[diff] [blame]	347	#endif /* CONFIG_ZPOOL */
				348
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	349	/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
				350	static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
				351
				352	static int is_first_page(struct page *page)
				353	{
Minchan Kim	a27545bf	2012-04-25 15:23:09 +0900	[diff] [blame]	354	return PagePrivate(page);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	355	}
				356
				357	static int is_last_page(struct page *page)
				358	{
Minchan Kim	a27545bf	2012-04-25 15:23:09 +0900	[diff] [blame]	359	return PagePrivate2(page);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	360	}
				361
				362	static void get_zspage_mapping(struct page page, unsigned int class_idx,
				363	enum fullness_group *fullness)
				364	{
				365	unsigned long m;
				366	BUG_ON(!is_first_page(page));
				367
				368	m = (unsigned long)page->mapping;
				369	*fullness = m & FULLNESS_MASK;
				370	*class_idx = (m >> FULLNESS_BITS) & CLASS_IDX_MASK;
				371	}
				372
				373	static void set_zspage_mapping(struct page *page, unsigned int class_idx,
				374	enum fullness_group fullness)
				375	{
				376	unsigned long m;
				377	BUG_ON(!is_first_page(page));
				378
				379	m = ((class_idx & CLASS_IDX_MASK) << FULLNESS_BITS) \|
				380	(fullness & FULLNESS_MASK);
				381	page->mapping = (struct address_space *)m;
				382	}
				383
Nitin Cupta	c3e3e88	2013-12-11 11:04:37 +0900	[diff] [blame]	384	/*
				385	* zsmalloc divides the pool into various size classes where each
				386	* class maintains a list of zspages where each zspage is divided
				387	* into equal sized chunks. Each allocation falls into one of these
				388	* classes depending on its size. This function returns index of the
				389	* size class which has chunk size big enough to hold the give size.
				390	*/
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	391	static int get_size_class_index(int size)
				392	{
				393	int idx = 0;
				394
				395	if (likely(size > ZS_MIN_ALLOC_SIZE))
				396	idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE,
				397	ZS_SIZE_CLASS_DELTA);
				398
				399	return idx;
				400	}
				401
Nitin Cupta	c3e3e88	2013-12-11 11:04:37 +0900	[diff] [blame]	402	/*
				403	* For each size class, zspages are divided into different groups
				404	* depending on how "full" they are. This was done so that we could
				405	* easily find empty or nearly empty zspages when we try to shrink
				406	* the pool (not yet implemented). This function returns fullness
				407	* status of the given page.
				408	*/
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	409	static enum fullness_group get_fullness_group(struct page *page)
				410	{
				411	int inuse, max_objects;
				412	enum fullness_group fg;
				413	BUG_ON(!is_first_page(page));
				414
				415	inuse = page->inuse;
				416	max_objects = page->objects;
				417
				418	if (inuse == 0)
				419	fg = ZS_EMPTY;
				420	else if (inuse == max_objects)
				421	fg = ZS_FULL;
				422	else if (inuse <= max_objects / fullness_threshold_frac)
				423	fg = ZS_ALMOST_EMPTY;
				424	else
				425	fg = ZS_ALMOST_FULL;
				426
				427	return fg;
				428	}
				429
Nitin Cupta	c3e3e88	2013-12-11 11:04:37 +0900	[diff] [blame]	430	/*
				431	* Each size class maintains various freelists and zspages are assigned
				432	* to one of these freelists based on the number of live objects they
				433	* have. This functions inserts the given zspage into the freelist
				434	* identified by <class, fullness_group>.
				435	*/
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	436	static void insert_zspage(struct page page, struct size_class class,
				437	enum fullness_group fullness)
				438	{
				439	struct page **head;
				440
				441	BUG_ON(!is_first_page(page));
				442
				443	if (fullness >= _ZS_NR_FULLNESS_GROUPS)
				444	return;
				445
				446	head = &class->fullness_list[fullness];
				447	if (*head)
				448	list_add_tail(&page->lru, &(*head)->lru);
				449
				450	*head = page;
				451	}
				452
Nitin Cupta	c3e3e88	2013-12-11 11:04:37 +0900	[diff] [blame]	453	/*
				454	* This function removes the given zspage from the freelist identified
				455	* by <class, fullness_group>.
				456	*/
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	457	static void remove_zspage(struct page page, struct size_class class,
				458	enum fullness_group fullness)
				459	{
				460	struct page **head;
				461
				462	BUG_ON(!is_first_page(page));
				463
				464	if (fullness >= _ZS_NR_FULLNESS_GROUPS)
				465	return;
				466
				467	head = &class->fullness_list[fullness];
				468	BUG_ON(!*head);
				469	if (list_empty(&(*head)->lru))
				470	*head = NULL;
				471	else if (*head == page)
				472	head = (struct page )list_entry((*head)->lru.next,
				473	struct page, lru);
				474
				475	list_del_init(&page->lru);
				476	}
				477
Nitin Cupta	c3e3e88	2013-12-11 11:04:37 +0900	[diff] [blame]	478	/*
				479	* Each size class maintains zspages in different fullness groups depending
				480	* on the number of live objects they contain. When allocating or freeing
				481	* objects, the fullness status of the page can change, say, from ALMOST_FULL
				482	* to ALMOST_EMPTY when freeing an object. This function checks if such
				483	* a status change has occurred for the given page and accordingly moves the
				484	* page from the freelist of the old fullness group to that of the new
				485	* fullness group.
				486	*/
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	487	static enum fullness_group fix_fullness_group(struct zs_pool *pool,
				488	struct page *page)
				489	{
				490	int class_idx;
				491	struct size_class *class;
				492	enum fullness_group currfg, newfg;
				493
				494	BUG_ON(!is_first_page(page));
				495
				496	get_zspage_mapping(page, &class_idx, &currfg);
				497	newfg = get_fullness_group(page);
				498	if (newfg == currfg)
				499	goto out;
				500
Joonsoo Kim	9eec4cd	2014-12-12 16:56:44 -0800	[diff] [blame]	501	class = pool->size_class[class_idx];
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	502	remove_zspage(page, class, currfg);
				503	insert_zspage(page, class, newfg);
				504	set_zspage_mapping(page, class_idx, newfg);
				505
				506	out:
				507	return newfg;
				508	}
				509
				510	/*
				511	* We have to decide on how many pages to link together
				512	* to form a zspage for each size class. This is important
				513	* to reduce wastage due to unusable space left at end of
				514	* each zspage which is given as:
				515	* wastage = Zp - Zp % size_class
				516	* where Zp = zspage size = k * PAGE_SIZE where k = 1, 2, ...
				517	*
				518	* For example, for size class of 3/8 * PAGE_SIZE, we should
				519	* link together 3 PAGE_SIZE sized pages to form a zspage
				520	* since then we can perfectly fit in 8 such objects.
				521	*/
Minchan Kim	2e3b615	2012-05-03 15:40:39 +0900	[diff] [blame]	522	static int get_pages_per_zspage(int class_size)
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	523	{
				524	int i, max_usedpc = 0;
				525	/* zspage order which gives maximum used size per KB */
				526	int max_usedpc_order = 1;
				527
Seth Jennings	84d4faa	2012-03-05 11:33:21 -0600	[diff] [blame]	528	for (i = 1; i <= ZS_MAX_PAGES_PER_ZSPAGE; i++) {
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	529	int zspage_size;
				530	int waste, usedpc;
				531
				532	zspage_size = i * PAGE_SIZE;
				533	waste = zspage_size % class_size;
				534	usedpc = (zspage_size - waste) * 100 / zspage_size;
				535
				536	if (usedpc > max_usedpc) {
				537	max_usedpc = usedpc;
				538	max_usedpc_order = i;
				539	}
				540	}
				541
				542	return max_usedpc_order;
				543	}
				544
				545	/*
				546	* A single 'zspage' is composed of many system pages which are
				547	* linked together using fields in struct page. This function finds
				548	* the first/head page, given any component page of a zspage.
				549	*/
				550	static struct page get_first_page(struct page page)
				551	{
				552	if (is_first_page(page))
				553	return page;
				554	else
				555	return page->first_page;
				556	}
				557
				558	static struct page get_next_page(struct page page)
				559	{
				560	struct page *next;
				561
				562	if (is_last_page(page))
				563	next = NULL;
				564	else if (is_first_page(page))
Sunghan Suh	e842b97	2013-07-12 16:08:13 +0900	[diff] [blame]	565	next = (struct page *)page_private(page);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	566	else
				567	next = list_entry(page->lru.next, struct page, lru);
				568
				569	return next;
				570	}
				571
Olav Haugan	6729687	2013-11-22 09:30:41 -0800	[diff] [blame]	572	/*
				573	* Encode <page, obj_idx> as a single handle value.
				574	* On hardware platforms with physical memory starting at 0x0 the pfn
				575	* could be 0 so we ensure that the handle will never be 0 by adjusting the
				576	* encoded obj_idx value before encoding.
				577	*/
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	578	static void obj_location_to_handle(struct page page, unsigned long obj_idx)
				579	{
				580	unsigned long handle;
				581
				582	if (!page) {
				583	BUG_ON(obj_idx);
				584	return NULL;
				585	}
				586
				587	handle = page_to_pfn(page) << OBJ_INDEX_BITS;
Olav Haugan	6729687	2013-11-22 09:30:41 -0800	[diff] [blame]	588	handle \|= ((obj_idx + 1) & OBJ_INDEX_MASK);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	589
				590	return (void *)handle;
				591	}
				592
Olav Haugan	6729687	2013-11-22 09:30:41 -0800	[diff] [blame]	593	/*
				594	* Decode <page, obj_idx> pair from the given object handle. We adjust the
				595	* decoded obj_idx back to its original value since it was adjusted in
				596	* obj_location_to_handle().
				597	*/
Minchan Kim	c234434	2012-06-08 15:39:25 +0900	[diff] [blame]	598	static void obj_handle_to_location(unsigned long handle, struct page **page,
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	599	unsigned long *obj_idx)
				600	{
Minchan Kim	c234434	2012-06-08 15:39:25 +0900	[diff] [blame]	601	*page = pfn_to_page(handle >> OBJ_INDEX_BITS);
Olav Haugan	6729687	2013-11-22 09:30:41 -0800	[diff] [blame]	602	*obj_idx = (handle & OBJ_INDEX_MASK) - 1;
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	603	}
				604
				605	static unsigned long obj_idx_to_offset(struct page *page,
				606	unsigned long obj_idx, int class_size)
				607	{
				608	unsigned long off = 0;
				609
				610	if (!is_first_page(page))
				611	off = page->index;
				612
				613	return off + obj_idx * class_size;
				614	}
				615
Nitin Gupta	f4477e9	2012-04-02 09:13:56 -0500	[diff] [blame]	616	static void reset_page(struct page *page)
				617	{
				618	clear_bit(PG_private, &page->flags);
				619	clear_bit(PG_private_2, &page->flags);
				620	set_page_private(page, 0);
				621	page->mapping = NULL;
				622	page->freelist = NULL;
Mel Gorman	22b751c	2013-02-22 16:34:59 -0800	[diff] [blame]	623	page_mapcount_reset(page);
Nitin Gupta	f4477e9	2012-04-02 09:13:56 -0500	[diff] [blame]	624	}
				625
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	626	static void free_zspage(struct page *first_page)
				627	{
Nitin Gupta	f4477e9	2012-04-02 09:13:56 -0500	[diff] [blame]	628	struct page nextp, tmp, *head_extra;
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	629
				630	BUG_ON(!is_first_page(first_page));
				631	BUG_ON(first_page->inuse);
				632
Nitin Gupta	f4477e9	2012-04-02 09:13:56 -0500	[diff] [blame]	633	head_extra = (struct page *)page_private(first_page);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	634
Nitin Gupta	f4477e9	2012-04-02 09:13:56 -0500	[diff] [blame]	635	reset_page(first_page);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	636	__free_page(first_page);
				637
				638	/* zspage with only 1 system page */
Nitin Gupta	f4477e9	2012-04-02 09:13:56 -0500	[diff] [blame]	639	if (!head_extra)
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	640	return;
				641
Nitin Gupta	f4477e9	2012-04-02 09:13:56 -0500	[diff] [blame]	642	list_for_each_entry_safe(nextp, tmp, &head_extra->lru, lru) {
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	643	list_del(&nextp->lru);
Nitin Gupta	f4477e9	2012-04-02 09:13:56 -0500	[diff] [blame]	644	reset_page(nextp);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	645	__free_page(nextp);
				646	}
Nitin Gupta	f4477e9	2012-04-02 09:13:56 -0500	[diff] [blame]	647	reset_page(head_extra);
				648	__free_page(head_extra);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	649	}
				650
				651	/* Initialize a newly allocated zspage */
				652	static void init_zspage(struct page first_page, struct size_class class)
				653	{
				654	unsigned long off = 0;
				655	struct page *page = first_page;
				656
				657	BUG_ON(!is_first_page(first_page));
				658	while (page) {
				659	struct page *next_page;
				660	struct link_free *link;
Dan Streetman	5538c56	2014-10-09 15:30:01 -0700	[diff] [blame]	661	unsigned int i = 1;
Minchan Kim	af4ee5e	2014-12-12 16:56:58 -0800	[diff] [blame]	662	void *vaddr;
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	663
				664	/*
				665	* page->index stores offset of first object starting
				666	* in the page. For the first page, this is always 0,
				667	* so we use first_page->index (aka ->freelist) to store
				668	* head of corresponding zspage's freelist.
				669	*/
				670	if (page != first_page)
				671	page->index = off;
				672
Minchan Kim	af4ee5e	2014-12-12 16:56:58 -0800	[diff] [blame]	673	vaddr = kmap_atomic(page);
				674	link = (struct link_free )vaddr + off / sizeof(link);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	675
Dan Streetman	5538c56	2014-10-09 15:30:01 -0700	[diff] [blame]	676	while ((off += class->size) < PAGE_SIZE) {
				677	link->next = obj_location_to_handle(page, i++);
				678	link += class->size / sizeof(*link);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	679	}
				680
				681	/*
				682	* We now come to the last (full or partial) object on this
				683	* page, which must point to the first object on the next
				684	* page (if present)
				685	*/
				686	next_page = get_next_page(page);
				687	link->next = obj_location_to_handle(next_page, 0);
Minchan Kim	af4ee5e	2014-12-12 16:56:58 -0800	[diff] [blame]	688	kunmap_atomic(vaddr);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	689	page = next_page;
Dan Streetman	5538c56	2014-10-09 15:30:01 -0700	[diff] [blame]	690	off %= PAGE_SIZE;
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	691	}
				692	}
				693
				694	/*
				695	* Allocate a zspage for the given size class
				696	*/
				697	static struct page alloc_zspage(struct size_class class, gfp_t flags)
				698	{
				699	int i, error;
Seth Jennings	b4b700c	2012-06-13 16:03:42 -0500	[diff] [blame]	700	struct page first_page = NULL, uninitialized_var(prev_page);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	701
				702	/*
				703	* Allocate individual pages and link them together as:
				704	* 1. first page->private = first sub-page
				705	* 2. all sub-pages are linked together using page->lru
				706	* 3. each sub-page is linked to the first page using page->first_page
				707	*
				708	* For each size class, First/Head pages are linked together using
				709	* page->lru. Also, we set PG_private to identify the first page
				710	* (i.e. no other sub-page has this flag set) and PG_private_2 to
				711	* identify the last page.
				712	*/
				713	error = -ENOMEM;
Minchan Kim	2e3b615	2012-05-03 15:40:39 +0900	[diff] [blame]	714	for (i = 0; i < class->pages_per_zspage; i++) {
Seth Jennings	b4b700c	2012-06-13 16:03:42 -0500	[diff] [blame]	715	struct page *page;
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	716
				717	page = alloc_page(flags);
				718	if (!page)
				719	goto cleanup;
				720
				721	INIT_LIST_HEAD(&page->lru);
				722	if (i == 0) { /* first page */
Minchan Kim	a27545bf	2012-04-25 15:23:09 +0900	[diff] [blame]	723	SetPagePrivate(page);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	724	set_page_private(page, 0);
				725	first_page = page;
				726	first_page->inuse = 0;
				727	}
				728	if (i == 1)
Sunghan Suh	e842b97	2013-07-12 16:08:13 +0900	[diff] [blame]	729	set_page_private(first_page, (unsigned long)page);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	730	if (i >= 1)
				731	page->first_page = first_page;
				732	if (i >= 2)
				733	list_add(&page->lru, &prev_page->lru);
Minchan Kim	2e3b615	2012-05-03 15:40:39 +0900	[diff] [blame]	734	if (i == class->pages_per_zspage - 1) /* last page */
Minchan Kim	a27545bf	2012-04-25 15:23:09 +0900	[diff] [blame]	735	SetPagePrivate2(page);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	736	prev_page = page;
				737	}
				738
				739	init_zspage(first_page, class);
				740
				741	first_page->freelist = obj_location_to_handle(first_page, 0);
				742	/* Maximum number of objects we can store in this zspage */
Minchan Kim	2e3b615	2012-05-03 15:40:39 +0900	[diff] [blame]	743	first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size;
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	744
				745	error = 0; /* Success */
				746
				747	cleanup:
				748	if (unlikely(error) && first_page) {
				749	free_zspage(first_page);
				750	first_page = NULL;
				751	}
				752
				753	return first_page;
				754	}
				755
				756	static struct page find_get_zspage(struct size_class class)
				757	{
				758	int i;
				759	struct page *page;
				760
				761	for (i = 0; i < _ZS_NR_FULLNESS_GROUPS; i++) {
				762	page = class->fullness_list[i];
				763	if (page)
				764	break;
				765	}
				766
				767	return page;
				768	}
				769
Minchan Kim	1b945ae	2013-12-11 11:04:36 +0900	[diff] [blame]	770	#ifdef CONFIG_PGTABLE_MAPPING
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	771	static inline int __zs_cpu_up(struct mapping_area *area)
Seth Jennings	5f60190	2012-07-02 16:15:49 -0500	[diff] [blame]	772	{
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	773	/*
				774	* Make sure we don't leak memory if a cpu UP notification
				775	* and zs_init() race and both call zs_cpu_up() on the same cpu
				776	*/
				777	if (area->vm)
				778	return 0;
				779	area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
				780	if (!area->vm)
				781	return -ENOMEM;
				782	return 0;
				783	}
				784
				785	static inline void __zs_cpu_down(struct mapping_area *area)
				786	{
				787	if (area->vm)
				788	free_vm_area(area->vm);
				789	area->vm = NULL;
				790	}
				791
				792	static inline void __zs_map_object(struct mapping_area area,
				793	struct page *pages[2], int off, int size)
				794	{
WANG Chao	f6f8ed4	2014-08-06 16:06:58 -0700	[diff] [blame]	795	BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages));
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	796	area->vm_addr = area->vm->addr;
				797	return area->vm_addr + off;
				798	}
				799
				800	static inline void __zs_unmap_object(struct mapping_area *area,
				801	struct page *pages[2], int off, int size)
				802	{
				803	unsigned long addr = (unsigned long)area->vm_addr;
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	804
Joerg Roedel	d95abbb	2013-03-27 01:43:14 +0100	[diff] [blame]	805	unmap_kernel_range(addr, PAGE_SIZE * 2);
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	806	}
				807
Minchan Kim	1b945ae	2013-12-11 11:04:36 +0900	[diff] [blame]	808	#else /* CONFIG_PGTABLE_MAPPING */
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	809
				810	static inline int __zs_cpu_up(struct mapping_area *area)
				811	{
				812	/*
				813	* Make sure we don't leak memory if a cpu UP notification
				814	* and zs_init() race and both call zs_cpu_up() on the same cpu
				815	*/
				816	if (area->vm_buf)
				817	return 0;
Mahendran Ganesh	40f9fb8	2014-12-12 16:57:01 -0800	[diff] [blame]	818	area->vm_buf = kmalloc(ZS_MAX_ALLOC_SIZE, GFP_KERNEL);
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	819	if (!area->vm_buf)
				820	return -ENOMEM;
				821	return 0;
				822	}
				823
				824	static inline void __zs_cpu_down(struct mapping_area *area)
				825	{
Mahendran Ganesh	40f9fb8	2014-12-12 16:57:01 -0800	[diff] [blame]	826	kfree(area->vm_buf);
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	827	area->vm_buf = NULL;
				828	}
				829
				830	static void __zs_map_object(struct mapping_area area,
				831	struct page *pages[2], int off, int size)
				832	{
Seth Jennings	5f60190	2012-07-02 16:15:49 -0500	[diff] [blame]	833	int sizes[2];
				834	void *addr;
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	835	char *buf = area->vm_buf;
Seth Jennings	5f60190	2012-07-02 16:15:49 -0500	[diff] [blame]	836
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	837	/* disable page faults to match kmap_atomic() return conditions */
				838	pagefault_disable();
				839
				840	/* no read fastpath */
				841	if (area->vm_mm == ZS_MM_WO)
				842	goto out;
Seth Jennings	5f60190	2012-07-02 16:15:49 -0500	[diff] [blame]	843
				844	sizes[0] = PAGE_SIZE - off;
				845	sizes[1] = size - sizes[0];
				846
Seth Jennings	5f60190	2012-07-02 16:15:49 -0500	[diff] [blame]	847	/* copy object to per-cpu buffer */
				848	addr = kmap_atomic(pages[0]);
				849	memcpy(buf, addr + off, sizes[0]);
				850	kunmap_atomic(addr);
				851	addr = kmap_atomic(pages[1]);
				852	memcpy(buf + sizes[0], addr, sizes[1]);
				853	kunmap_atomic(addr);
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	854	out:
				855	return area->vm_buf;
Seth Jennings	5f60190	2012-07-02 16:15:49 -0500	[diff] [blame]	856	}
				857
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	858	static void __zs_unmap_object(struct mapping_area *area,
				859	struct page *pages[2], int off, int size)
Seth Jennings	5f60190	2012-07-02 16:15:49 -0500	[diff] [blame]	860	{
Seth Jennings	5f60190	2012-07-02 16:15:49 -0500	[diff] [blame]	861	int sizes[2];
				862	void *addr;
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	863	char *buf = area->vm_buf;
Seth Jennings	5f60190	2012-07-02 16:15:49 -0500	[diff] [blame]	864
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	865	/* no write fastpath */
				866	if (area->vm_mm == ZS_MM_RO)
				867	goto out;
Seth Jennings	5f60190	2012-07-02 16:15:49 -0500	[diff] [blame]	868
				869	sizes[0] = PAGE_SIZE - off;
				870	sizes[1] = size - sizes[0];
				871
				872	/* copy per-cpu buffer to object */
				873	addr = kmap_atomic(pages[0]);
				874	memcpy(addr + off, buf, sizes[0]);
				875	kunmap_atomic(addr);
				876	addr = kmap_atomic(pages[1]);
				877	memcpy(addr, buf + sizes[0], sizes[1]);
				878	kunmap_atomic(addr);
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	879
				880	out:
				881	/* enable page faults to match kunmap_atomic() return conditions */
				882	pagefault_enable();
Seth Jennings	5f60190	2012-07-02 16:15:49 -0500	[diff] [blame]	883	}
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	884
Minchan Kim	1b945ae	2013-12-11 11:04:36 +0900	[diff] [blame]	885	#endif /* CONFIG_PGTABLE_MAPPING */
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	886
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	887	static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action,
				888	void *pcpu)
				889	{
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	890	int ret, cpu = (long)pcpu;
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	891	struct mapping_area *area;
				892
				893	switch (action) {
				894	case CPU_UP_PREPARE:
				895	area = &per_cpu(zs_map_area, cpu);
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	896	ret = __zs_cpu_up(area);
				897	if (ret)
				898	return notifier_from_errno(ret);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	899	break;
				900	case CPU_DEAD:
				901	case CPU_UP_CANCELED:
				902	area = &per_cpu(zs_map_area, cpu);
Seth Jennings	f553646	2012-07-18 11:55:56 -0500	[diff] [blame]	903	__zs_cpu_down(area);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	904	break;
				905	}
				906
				907	return NOTIFY_OK;
				908	}
				909
				910	static struct notifier_block zs_cpu_nb = {
				911	.notifier_call = zs_cpu_notifier
				912	};
				913
Sergey Senozhatsky	b1b00a5	2014-12-12 16:56:56 -0800	[diff] [blame]	914	static int zs_register_cpu_notifier(void)
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	915	{
Sergey Senozhatsky	b1b00a5	2014-12-12 16:56:56 -0800	[diff] [blame]	916	int cpu, uninitialized_var(ret);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	917
Srivatsa S. Bhat	f0e71fc	2014-03-11 02:09:59 +0530	[diff] [blame]	918	cpu_notifier_register_begin();
				919
				920	__register_cpu_notifier(&zs_cpu_nb);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	921	for_each_online_cpu(cpu) {
				922	ret = zs_cpu_notifier(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
Sergey Senozhatsky	b1b00a5	2014-12-12 16:56:56 -0800	[diff] [blame]	923	if (notifier_to_errno(ret))
				924	break;
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	925	}
Srivatsa S. Bhat	f0e71fc	2014-03-11 02:09:59 +0530	[diff] [blame]	926
				927	cpu_notifier_register_done();
Sergey Senozhatsky	b1b00a5	2014-12-12 16:56:56 -0800	[diff] [blame]	928	return notifier_to_errno(ret);
				929	}
				930
Ganesh Mahendran	66cdef6	2014-12-18 16:17:40 -0800	[diff] [blame]	931	static void zs_unregister_cpu_notifier(void)
				932	{
				933	int cpu;
				934
				935	cpu_notifier_register_begin();
				936
				937	for_each_online_cpu(cpu)
				938	zs_cpu_notifier(NULL, CPU_DEAD, (void *)(long)cpu);
				939	__unregister_cpu_notifier(&zs_cpu_nb);
				940
				941	cpu_notifier_register_done();
				942	}
				943
Mahendran Ganesh	40f9fb8	2014-12-12 16:57:01 -0800	[diff] [blame]	944	static void init_zs_size_classes(void)
				945	{
				946	int nr;
				947
				948	nr = (ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / ZS_SIZE_CLASS_DELTA + 1;
				949	if ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) % ZS_SIZE_CLASS_DELTA)
				950	nr += 1;
				951
				952	zs_size_classes = nr;
				953	}
				954
Joonsoo Kim	9eec4cd	2014-12-12 16:56:44 -0800	[diff] [blame]	955	static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)
				956	{
				957	return pages_per_zspage * PAGE_SIZE / size;
				958	}
				959
				960	static bool can_merge(struct size_class *prev, int size, int pages_per_zspage)
				961	{
				962	if (prev->pages_per_zspage != pages_per_zspage)
				963	return false;
				964
				965	if (get_maxobj_per_zspage(prev->size, prev->pages_per_zspage)
				966	!= get_maxobj_per_zspage(size, pages_per_zspage))
				967	return false;
				968
				969	return true;
				970	}
				971
Ganesh Mahendran	0f050d9	2015-02-12 15:00:54 -0800	[diff] [blame]	972	#ifdef CONFIG_ZSMALLOC_STAT
				973
				974	static inline void zs_stat_inc(struct size_class *class,
				975	enum zs_stat_type type, unsigned long cnt)
				976	{
				977	class->stats.objs[type] += cnt;
				978	}
				979
				980	static inline void zs_stat_dec(struct size_class *class,
				981	enum zs_stat_type type, unsigned long cnt)
				982	{
				983	class->stats.objs[type] -= cnt;
				984	}
				985
				986	static inline unsigned long zs_stat_get(struct size_class *class,
				987	enum zs_stat_type type)
				988	{
				989	return class->stats.objs[type];
				990	}
				991
				992	static int __init zs_stat_init(void)
				993	{
				994	if (!debugfs_initialized())
				995	return -ENODEV;
				996
				997	zs_stat_root = debugfs_create_dir("zsmalloc", NULL);
				998	if (!zs_stat_root)
				999	return -ENOMEM;
				1000
				1001	return 0;
				1002	}
				1003
				1004	static void __exit zs_stat_exit(void)
				1005	{
				1006	debugfs_remove_recursive(zs_stat_root);
				1007	}
				1008
				1009	static int zs_stats_size_show(struct seq_file s, void v)
				1010	{
				1011	int i;
				1012	struct zs_pool *pool = s->private;
				1013	struct size_class *class;
				1014	int objs_per_zspage;
				1015	unsigned long obj_allocated, obj_used, pages_used;
				1016	unsigned long total_objs = 0, total_used_objs = 0, total_pages = 0;
				1017
				1018	seq_printf(s, " %5s %5s %13s %10s %10s\n", "class", "size",
				1019	"obj_allocated", "obj_used", "pages_used");
				1020
				1021	for (i = 0; i < zs_size_classes; i++) {
				1022	class = pool->size_class[i];
				1023
				1024	if (class->index != i)
				1025	continue;
				1026
				1027	spin_lock(&class->lock);
				1028	obj_allocated = zs_stat_get(class, OBJ_ALLOCATED);
				1029	obj_used = zs_stat_get(class, OBJ_USED);
				1030	spin_unlock(&class->lock);
				1031
				1032	objs_per_zspage = get_maxobj_per_zspage(class->size,
				1033	class->pages_per_zspage);
				1034	pages_used = obj_allocated / objs_per_zspage *
				1035	class->pages_per_zspage;
				1036
				1037	seq_printf(s, " %5u %5u %10lu %10lu %10lu\n", i,
				1038	class->size, obj_allocated, obj_used, pages_used);
				1039
				1040	total_objs += obj_allocated;
				1041	total_used_objs += obj_used;
				1042	total_pages += pages_used;
				1043	}
				1044
				1045	seq_puts(s, "\n");
				1046	seq_printf(s, " %5s %5s %10lu %10lu %10lu\n", "Total", "",
				1047	total_objs, total_used_objs, total_pages);
				1048
				1049	return 0;
				1050	}
				1051
				1052	static int zs_stats_size_open(struct inode inode, struct file file)
				1053	{
				1054	return single_open(file, zs_stats_size_show, inode->i_private);
				1055	}
				1056
				1057	static const struct file_operations zs_stat_size_ops = {
				1058	.open = zs_stats_size_open,
				1059	.read = seq_read,
				1060	.llseek = seq_lseek,
				1061	.release = single_release,
				1062	};
				1063
				1064	static int zs_pool_stat_create(char name, struct zs_pool pool)
				1065	{
				1066	struct dentry *entry;
				1067
				1068	if (!zs_stat_root)
				1069	return -ENODEV;
				1070
				1071	entry = debugfs_create_dir(name, zs_stat_root);
				1072	if (!entry) {
				1073	pr_warn("debugfs dir <%s> creation failed\n", name);
				1074	return -ENOMEM;
				1075	}
				1076	pool->stat_dentry = entry;
				1077
				1078	entry = debugfs_create_file("obj_in_classes", S_IFREG \| S_IRUGO,
				1079	pool->stat_dentry, pool, &zs_stat_size_ops);
				1080	if (!entry) {
				1081	pr_warn("%s: debugfs file entry <%s> creation failed\n",
				1082	name, "obj_in_classes");
				1083	return -ENOMEM;
				1084	}
				1085
				1086	return 0;
				1087	}
				1088
				1089	static void zs_pool_stat_destroy(struct zs_pool *pool)
				1090	{
				1091	debugfs_remove_recursive(pool->stat_dentry);
				1092	}
				1093
				1094	#else /* CONFIG_ZSMALLOC_STAT */
				1095
				1096	static inline void zs_stat_inc(struct size_class *class,
				1097	enum zs_stat_type type, unsigned long cnt)
				1098	{
				1099	}
				1100
				1101	static inline void zs_stat_dec(struct size_class *class,
				1102	enum zs_stat_type type, unsigned long cnt)
				1103	{
				1104	}
				1105
				1106	static inline unsigned long zs_stat_get(struct size_class *class,
				1107	enum zs_stat_type type)
				1108	{
				1109	return 0;
				1110	}
				1111
				1112	static int __init zs_stat_init(void)
				1113	{
				1114	return 0;
				1115	}
				1116
				1117	static void __exit zs_stat_exit(void)
				1118	{
				1119	}
				1120
				1121	static inline int zs_pool_stat_create(char name, struct zs_pool pool)
				1122	{
				1123	return 0;
				1124	}
				1125
				1126	static inline void zs_pool_stat_destroy(struct zs_pool *pool)
				1127	{
				1128	}
				1129
				1130	#endif
				1131
Ganesh Mahendran	66cdef6	2014-12-18 16:17:40 -0800	[diff] [blame]	1132	unsigned long zs_get_total_pages(struct zs_pool *pool)
				1133	{
				1134	return atomic_long_read(&pool->pages_allocated);
				1135	}
				1136	EXPORT_SYMBOL_GPL(zs_get_total_pages);
				1137
				1138	/**
				1139	* zs_map_object - get address of allocated object from handle.
				1140	* @pool: pool from which the object was allocated
				1141	* @handle: handle returned from zs_malloc
				1142	*
				1143	* Before using an object allocated from zs_malloc, it must be mapped using
				1144	* this function. When done with the object, it must be unmapped using
				1145	* zs_unmap_object.
				1146	*
				1147	* Only one object can be mapped per cpu at a time. There is no protection
				1148	* against nested mappings.
				1149	*
				1150	* This function returns with preemption and page faults disabled.
				1151	*/
				1152	void zs_map_object(struct zs_pool pool, unsigned long handle,
				1153	enum zs_mapmode mm)
				1154	{
				1155	struct page *page;
				1156	unsigned long obj_idx, off;
				1157
				1158	unsigned int class_idx;
				1159	enum fullness_group fg;
				1160	struct size_class *class;
				1161	struct mapping_area *area;
				1162	struct page *pages[2];
				1163
				1164	BUG_ON(!handle);
				1165
				1166	/*
				1167	* Because we use per-cpu mapping areas shared among the
				1168	* pools/users, we can't allow mapping in interrupt context
				1169	* because it can corrupt another users mappings.
				1170	*/
				1171	BUG_ON(in_interrupt());
				1172
				1173	obj_handle_to_location(handle, &page, &obj_idx);
				1174	get_zspage_mapping(get_first_page(page), &class_idx, &fg);
				1175	class = pool->size_class[class_idx];
				1176	off = obj_idx_to_offset(page, obj_idx, class->size);
				1177
				1178	area = &get_cpu_var(zs_map_area);
				1179	area->vm_mm = mm;
				1180	if (off + class->size <= PAGE_SIZE) {
				1181	/* this object is contained entirely within a page */
				1182	area->vm_addr = kmap_atomic(page);
				1183	return area->vm_addr + off;
				1184	}
				1185
				1186	/* this object spans two pages */
				1187	pages[0] = page;
				1188	pages[1] = get_next_page(page);
				1189	BUG_ON(!pages[1]);
				1190
				1191	return __zs_map_object(area, pages, off, class->size);
				1192	}
				1193	EXPORT_SYMBOL_GPL(zs_map_object);
				1194
				1195	void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
				1196	{
				1197	struct page *page;
				1198	unsigned long obj_idx, off;
				1199
				1200	unsigned int class_idx;
				1201	enum fullness_group fg;
				1202	struct size_class *class;
				1203	struct mapping_area *area;
				1204
				1205	BUG_ON(!handle);
				1206
				1207	obj_handle_to_location(handle, &page, &obj_idx);
				1208	get_zspage_mapping(get_first_page(page), &class_idx, &fg);
				1209	class = pool->size_class[class_idx];
				1210	off = obj_idx_to_offset(page, obj_idx, class->size);
				1211
				1212	area = this_cpu_ptr(&zs_map_area);
				1213	if (off + class->size <= PAGE_SIZE)
				1214	kunmap_atomic(area->vm_addr);
				1215	else {
				1216	struct page *pages[2];
				1217
				1218	pages[0] = page;
				1219	pages[1] = get_next_page(page);
				1220	BUG_ON(!pages[1]);
				1221
				1222	__zs_unmap_object(area, pages, off, class->size);
				1223	}
				1224	put_cpu_var(zs_map_area);
				1225	}
				1226	EXPORT_SYMBOL_GPL(zs_unmap_object);
				1227
				1228	/**
				1229	* zs_malloc - Allocate block of given size from pool.
				1230	* @pool: pool to allocate from
				1231	* @size: size of block to allocate
				1232	*
				1233	* On success, handle to the allocated object is returned,
				1234	* otherwise 0.
				1235	* Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail.
				1236	*/
				1237	unsigned long zs_malloc(struct zs_pool *pool, size_t size)
				1238	{
				1239	unsigned long obj;
				1240	struct link_free *link;
				1241	struct size_class *class;
				1242	void *vaddr;
				1243
				1244	struct page first_page, m_page;
				1245	unsigned long m_objidx, m_offset;
				1246
				1247	if (unlikely(!size \|\| size > ZS_MAX_ALLOC_SIZE))
				1248	return 0;
				1249
				1250	class = pool->size_class[get_size_class_index(size)];
				1251
				1252	spin_lock(&class->lock);
				1253	first_page = find_get_zspage(class);
				1254
				1255	if (!first_page) {
				1256	spin_unlock(&class->lock);
				1257	first_page = alloc_zspage(class, pool->flags);
				1258	if (unlikely(!first_page))
				1259	return 0;
				1260
				1261	set_zspage_mapping(first_page, class->index, ZS_EMPTY);
				1262	atomic_long_add(class->pages_per_zspage,
				1263	&pool->pages_allocated);
Ganesh Mahendran	0f050d9	2015-02-12 15:00:54 -0800	[diff] [blame]	1264
Ganesh Mahendran	66cdef6	2014-12-18 16:17:40 -0800	[diff] [blame]	1265	spin_lock(&class->lock);
Ganesh Mahendran	0f050d9	2015-02-12 15:00:54 -0800	[diff] [blame]	1266	zs_stat_inc(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
				1267	class->size, class->pages_per_zspage));
Ganesh Mahendran	66cdef6	2014-12-18 16:17:40 -0800	[diff] [blame]	1268	}
				1269
				1270	obj = (unsigned long)first_page->freelist;
				1271	obj_handle_to_location(obj, &m_page, &m_objidx);
				1272	m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
				1273
				1274	vaddr = kmap_atomic(m_page);
				1275	link = (struct link_free )vaddr + m_offset / sizeof(link);
				1276	first_page->freelist = link->next;
				1277	memset(link, POISON_INUSE, sizeof(*link));
				1278	kunmap_atomic(vaddr);
				1279
				1280	first_page->inuse++;
Ganesh Mahendran	0f050d9	2015-02-12 15:00:54 -0800	[diff] [blame]	1281	zs_stat_inc(class, OBJ_USED, 1);
Ganesh Mahendran	66cdef6	2014-12-18 16:17:40 -0800	[diff] [blame]	1282	/* Now move the zspage to another fullness group, if required */
				1283	fix_fullness_group(pool, first_page);
				1284	spin_unlock(&class->lock);
				1285
				1286	return obj;
				1287	}
				1288	EXPORT_SYMBOL_GPL(zs_malloc);
				1289
				1290	void zs_free(struct zs_pool *pool, unsigned long obj)
				1291	{
				1292	struct link_free *link;
				1293	struct page first_page, f_page;
				1294	unsigned long f_objidx, f_offset;
				1295	void *vaddr;
				1296
				1297	int class_idx;
				1298	struct size_class *class;
				1299	enum fullness_group fullness;
				1300
				1301	if (unlikely(!obj))
				1302	return;
				1303
				1304	obj_handle_to_location(obj, &f_page, &f_objidx);
				1305	first_page = get_first_page(f_page);
				1306
				1307	get_zspage_mapping(first_page, &class_idx, &fullness);
				1308	class = pool->size_class[class_idx];
				1309	f_offset = obj_idx_to_offset(f_page, f_objidx, class->size);
				1310
				1311	spin_lock(&class->lock);
				1312
				1313	/* Insert this object in containing zspage's freelist */
				1314	vaddr = kmap_atomic(f_page);
				1315	link = (struct link_free *)(vaddr + f_offset);
				1316	link->next = first_page->freelist;
				1317	kunmap_atomic(vaddr);
				1318	first_page->freelist = (void *)obj;
				1319
				1320	first_page->inuse--;
				1321	fullness = fix_fullness_group(pool, first_page);
Ganesh Mahendran	0f050d9	2015-02-12 15:00:54 -0800	[diff] [blame]	1322
				1323	zs_stat_dec(class, OBJ_USED, 1);
				1324	if (fullness == ZS_EMPTY)
				1325	zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
				1326	class->size, class->pages_per_zspage));
				1327
Ganesh Mahendran	66cdef6	2014-12-18 16:17:40 -0800	[diff] [blame]	1328	spin_unlock(&class->lock);
				1329
				1330	if (fullness == ZS_EMPTY) {
				1331	atomic_long_sub(class->pages_per_zspage,
				1332	&pool->pages_allocated);
				1333	free_zspage(first_page);
				1334	}
				1335	}
				1336	EXPORT_SYMBOL_GPL(zs_free);
				1337
Davidlohr Bueso	4bbc0bc	2013-01-04 12:14:00 -0800	[diff] [blame]	1338	/**
				1339	* zs_create_pool - Creates an allocation pool to work from.
Seth Jennings	0d145a5	2013-01-30 09:36:52 -0600	[diff] [blame]	1340	* @flags: allocation flags used to allocate pool metadata
Davidlohr Bueso	4bbc0bc	2013-01-04 12:14:00 -0800	[diff] [blame]	1341	*
				1342	* This function must be called before anything when using
				1343	* the zsmalloc allocator.
				1344	*
				1345	* On success, a pointer to the newly created pool is returned,
				1346	* otherwise NULL.
				1347	*/
Ganesh Mahendran	3eba0c6	2015-02-12 15:00:51 -0800	[diff] [blame]	1348	struct zs_pool zs_create_pool(char name, gfp_t flags)
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1349	{
Ganesh Mahendran	1813665	2014-12-12 16:57:10 -0800	[diff] [blame]	1350	int i;
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1351	struct zs_pool *pool;
Ganesh Mahendran	df8b5bb	2014-12-12 16:57:07 -0800	[diff] [blame]	1352	struct size_class *prev_class = NULL;
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1353
Ganesh Mahendran	1813665	2014-12-12 16:57:10 -0800	[diff] [blame]	1354	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1355	if (!pool)
				1356	return NULL;
				1357
Ganesh Mahendran	0f050d9	2015-02-12 15:00:54 -0800	[diff] [blame]	1358	pool->name = kstrdup(name, GFP_KERNEL);
				1359	if (!pool->name) {
				1360	kfree(pool);
				1361	return NULL;
				1362	}
				1363
Mahendran Ganesh	40f9fb8	2014-12-12 16:57:01 -0800	[diff] [blame]	1364	pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *),
				1365	GFP_KERNEL);
				1366	if (!pool->size_class) {
Ganesh Mahendran	0f050d9	2015-02-12 15:00:54 -0800	[diff] [blame]	1367	kfree(pool->name);
Mahendran Ganesh	40f9fb8	2014-12-12 16:57:01 -0800	[diff] [blame]	1368	kfree(pool);
				1369	return NULL;
				1370	}
				1371
Joonsoo Kim	9eec4cd	2014-12-12 16:56:44 -0800	[diff] [blame]	1372	/*
				1373	* Iterate reversly, because, size of size_class that we want to use
				1374	* for merging should be larger or equal to current size.
				1375	*/
Mahendran Ganesh	40f9fb8	2014-12-12 16:57:01 -0800	[diff] [blame]	1376	for (i = zs_size_classes - 1; i >= 0; i--) {
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1377	int size;
Joonsoo Kim	9eec4cd	2014-12-12 16:56:44 -0800	[diff] [blame]	1378	int pages_per_zspage;
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1379	struct size_class *class;
				1380
				1381	size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA;
				1382	if (size > ZS_MAX_ALLOC_SIZE)
				1383	size = ZS_MAX_ALLOC_SIZE;
Joonsoo Kim	9eec4cd	2014-12-12 16:56:44 -0800	[diff] [blame]	1384	pages_per_zspage = get_pages_per_zspage(size);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1385
Joonsoo Kim	9eec4cd	2014-12-12 16:56:44 -0800	[diff] [blame]	1386	/*
				1387	* size_class is used for normal zsmalloc operation such
				1388	* as alloc/free for that size. Although it is natural that we
				1389	* have one size_class for each size, there is a chance that we
				1390	* can get more memory utilization if we use one size_class for
				1391	* many different sizes whose size_class have same
				1392	* characteristics. So, we makes size_class point to
				1393	* previous size_class if possible.
				1394	*/
Ganesh Mahendran	df8b5bb	2014-12-12 16:57:07 -0800	[diff] [blame]	1395	if (prev_class) {
Joonsoo Kim	9eec4cd	2014-12-12 16:56:44 -0800	[diff] [blame]	1396	if (can_merge(prev_class, size, pages_per_zspage)) {
				1397	pool->size_class[i] = prev_class;
				1398	continue;
				1399	}
				1400	}
				1401
				1402	class = kzalloc(sizeof(struct size_class), GFP_KERNEL);
				1403	if (!class)
				1404	goto err;
				1405
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1406	class->size = size;
				1407	class->index = i;
Joonsoo Kim	9eec4cd	2014-12-12 16:56:44 -0800	[diff] [blame]	1408	class->pages_per_zspage = pages_per_zspage;
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1409	spin_lock_init(&class->lock);
Joonsoo Kim	9eec4cd	2014-12-12 16:56:44 -0800	[diff] [blame]	1410	pool->size_class[i] = class;
Ganesh Mahendran	df8b5bb	2014-12-12 16:57:07 -0800	[diff] [blame]	1411
				1412	prev_class = class;
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1413	}
				1414
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1415	pool->flags = flags;
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1416
Ganesh Mahendran	0f050d9	2015-02-12 15:00:54 -0800	[diff] [blame]	1417	if (zs_pool_stat_create(name, pool))
				1418	goto err;
				1419
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1420	return pool;
Joonsoo Kim	9eec4cd	2014-12-12 16:56:44 -0800	[diff] [blame]	1421
				1422	err:
				1423	zs_destroy_pool(pool);
				1424	return NULL;
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1425	}
				1426	EXPORT_SYMBOL_GPL(zs_create_pool);
				1427
				1428	void zs_destroy_pool(struct zs_pool *pool)
				1429	{
				1430	int i;
				1431
Ganesh Mahendran	0f050d9	2015-02-12 15:00:54 -0800	[diff] [blame]	1432	zs_pool_stat_destroy(pool);
				1433
Mahendran Ganesh	40f9fb8	2014-12-12 16:57:01 -0800	[diff] [blame]	1434	for (i = 0; i < zs_size_classes; i++) {
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1435	int fg;
Joonsoo Kim	9eec4cd	2014-12-12 16:56:44 -0800	[diff] [blame]	1436	struct size_class *class = pool->size_class[i];
				1437
				1438	if (!class)
				1439	continue;
				1440
				1441	if (class->index != i)
				1442	continue;
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1443
				1444	for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) {
				1445	if (class->fullness_list[fg]) {
Marlies Ruck	93ad5ab	2013-05-15 16:56:49 -0400	[diff] [blame]	1446	pr_info("Freeing non-empty class with size %db, fullness group %d\n",
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1447	class->size, fg);
				1448	}
				1449	}
Joonsoo Kim	9eec4cd	2014-12-12 16:56:44 -0800	[diff] [blame]	1450	kfree(class);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1451	}
Mahendran Ganesh	40f9fb8	2014-12-12 16:57:01 -0800	[diff] [blame]	1452
				1453	kfree(pool->size_class);
Ganesh Mahendran	0f050d9	2015-02-12 15:00:54 -0800	[diff] [blame]	1454	kfree(pool->name);
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1455	kfree(pool);
				1456	}
				1457	EXPORT_SYMBOL_GPL(zs_destroy_pool);
				1458
Ganesh Mahendran	66cdef6	2014-12-18 16:17:40 -0800	[diff] [blame]	1459	static int __init zs_init(void)
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1460	{
Ganesh Mahendran	66cdef6	2014-12-18 16:17:40 -0800	[diff] [blame]	1461	int ret = zs_register_cpu_notifier();
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1462
Ganesh Mahendran	0f050d9	2015-02-12 15:00:54 -0800	[diff] [blame]	1463	if (ret)
				1464	goto notifier_fail;
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1465
Ganesh Mahendran	66cdef6	2014-12-18 16:17:40 -0800	[diff] [blame]	1466	init_zs_size_classes();
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1467
Ganesh Mahendran	66cdef6	2014-12-18 16:17:40 -0800	[diff] [blame]	1468	#ifdef CONFIG_ZPOOL
				1469	zpool_register_driver(&zs_zpool_driver);
				1470	#endif
Ganesh Mahendran	0f050d9	2015-02-12 15:00:54 -0800	[diff] [blame]	1471
				1472	ret = zs_stat_init();
				1473	if (ret) {
				1474	pr_err("zs stat initialization failed\n");
				1475	goto stat_fail;
				1476	}
Ganesh Mahendran	66cdef6	2014-12-18 16:17:40 -0800	[diff] [blame]	1477	return 0;
Ganesh Mahendran	0f050d9	2015-02-12 15:00:54 -0800	[diff] [blame]	1478
				1479	stat_fail:
				1480	#ifdef CONFIG_ZPOOL
				1481	zpool_unregister_driver(&zs_zpool_driver);
				1482	#endif
				1483	notifier_fail:
				1484	zs_unregister_cpu_notifier();
				1485
				1486	return ret;
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1487	}
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1488
Ganesh Mahendran	66cdef6	2014-12-18 16:17:40 -0800	[diff] [blame]	1489	static void __exit zs_exit(void)
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1490	{
Ganesh Mahendran	66cdef6	2014-12-18 16:17:40 -0800	[diff] [blame]	1491	#ifdef CONFIG_ZPOOL
				1492	zpool_unregister_driver(&zs_zpool_driver);
				1493	#endif
				1494	zs_unregister_cpu_notifier();
Ganesh Mahendran	0f050d9	2015-02-12 15:00:54 -0800	[diff] [blame]	1495
				1496	zs_stat_exit();
Nitin Gupta	61989a8	2012-01-09 16:51:56 -0600	[diff] [blame]	1497	}
Ben Hutchings	069f101	2012-06-20 02:31:11 +0100	[diff] [blame]	1498
				1499	module_init(zs_init);
				1500	module_exit(zs_exit);
				1501
				1502	MODULE_LICENSE("Dual BSD/GPL");
				1503	MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");