Blame - drivers/staging/zcache/zcache-main.c - kernel/msm-4.9

blob: ce07087750a690783b2d560e5cf78c91b59d3df2 [file] [log] [blame]

Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1	/*
				2	* zcache.c
				3	*
				4	* Copyright (c) 2010,2011, Dan Magenheimer, Oracle Corp.
				5	* Copyright (c) 2010,2011, Nitin Gupta
				6	*
				7	* Zcache provides an in-kernel "host implementation" for transcendent memory
				8	* and, thus indirectly, for cleancache and frontswap. Zcache includes two
				9	* page-accessible memory [1] interfaces, both utilizing lzo1x compression:
				10	* 1) "compression buddies" ("zbud") is used for ephemeral pages
				11	* 2) xvmalloc is used for persistent pages.
				12	* Xvmalloc (based on the TLSF allocator) has very low fragmentation
				13	* so maximizes space efficiency, while zbud allows pairs (and potentially,
				14	* in the future, more than a pair of) compressed pages to be closely linked
				15	* so that reclaiming can be done via the kernel's physical-page-oriented
				16	* "shrinker" interface.
				17	*
				18	* [1] For a definition of page-accessible memory (aka PAM), see:
				19	* http://marc.info/?l=linux-mm&m=127811271605009
				20	*/
				21
Thadeu Lima de Souza Cascardo	12623f0	2011-08-03 11:00:40 -0300	[diff] [blame]	22	#include <linux/module.h>
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	23	#include <linux/cpu.h>
				24	#include <linux/highmem.h>
				25	#include <linux/list.h>
				26	#include <linux/lzo.h>
				27	#include <linux/slab.h>
				28	#include <linux/spinlock.h>
				29	#include <linux/types.h>
				30	#include <linux/atomic.h>
Thadeu Lima de Souza Cascardo	3ca15c4	2011-08-04 19:00:33 -0300	[diff] [blame]	31	#include <linux/math64.h>
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	32	#include "tmem.h"
				33
				34	#include "../zram/xvmalloc.h" /* if built in drivers/staging */
				35
				36	#if (!defined(CONFIG_CLEANCACHE) && !defined(CONFIG_FRONTSWAP))
				37	#error "zcache is useless without CONFIG_CLEANCACHE or CONFIG_FRONTSWAP"
				38	#endif
				39	#ifdef CONFIG_CLEANCACHE
				40	#include <linux/cleancache.h>
				41	#endif
				42	#ifdef CONFIG_FRONTSWAP
				43	#include <linux/frontswap.h>
				44	#endif
				45
				46	#if 0
				47	/* this is more aggressive but may cause other problems? */
				48	#define ZCACHE_GFP_MASK (GFP_ATOMIC \| __GFP_NORETRY \| __GFP_NOWARN)
				49	#else
				50	#define ZCACHE_GFP_MASK \
				51	(__GFP_FS \| __GFP_NORETRY \| __GFP_NOWARN \| __GFP_NOMEMALLOC)
				52	#endif
				53
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	54	#define MAX_POOLS_PER_CLIENT 16
				55
				56	#define MAX_CLIENTS 16
				57	#define LOCAL_CLIENT ((uint16_t)-1)
Thadeu Lima de Souza Cascardo	fd6b68b	2011-08-02 14:20:27 -0300	[diff] [blame]	58
				59	MODULE_LICENSE("GPL");
				60
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	61	struct zcache_client {
				62	struct tmem_pool *tmem_pools[MAX_POOLS_PER_CLIENT];
				63	struct xv_pool *xvpool;
				64	bool allocated;
				65	atomic_t refcount;
				66	};
				67
				68	static struct zcache_client zcache_host;
				69	static struct zcache_client zcache_clients[MAX_CLIENTS];
				70
				71	static inline uint16_t get_client_id_from_client(struct zcache_client *cli)
				72	{
				73	BUG_ON(cli == NULL);
				74	if (cli == &zcache_host)
				75	return LOCAL_CLIENT;
				76	return cli - &zcache_clients[0];
				77	}
				78
				79	static inline bool is_local_client(struct zcache_client *cli)
				80	{
				81	return cli == &zcache_host;
				82	}
				83
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	84	/**********
				85	* Compression buddies ("zbud") provides for packing two (or, possibly
				86	* in the future, more) compressed ephemeral pages into a single "raw"
				87	* (physical) page and tracking them with data structures so that
				88	* the raw pages can be easily reclaimed.
				89	*
				90	* A zbud page ("zbpg") is an aligned page containing a list_head,
				91	* a lock, and two "zbud headers". The remainder of the physical
				92	* page is divided up into aligned 64-byte "chunks" which contain
				93	* the compressed data for zero, one, or two zbuds. Each zbpg
				94	* resides on: (1) an "unused list" if it has no zbuds; (2) a
				95	* "buddied" list if it is fully populated with two zbuds; or
				96	* (3) one of PAGE_SIZE/64 "unbuddied" lists indexed by how many chunks
				97	* the one unbuddied zbud uses. The data inside a zbpg cannot be
				98	* read or written unless the zbpg's lock is held.
				99	*/
				100
				101	#define ZBH_SENTINEL 0x43214321
				102	#define ZBPG_SENTINEL 0xdeadbeef
				103
				104	#define ZBUD_MAX_BUDS 2
				105
				106	struct zbud_hdr {
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	107	uint16_t client_id;
				108	uint16_t pool_id;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	109	struct tmem_oid oid;
				110	uint32_t index;
				111	uint16_t size; /* compressed size in bytes, zero means unused */
				112	DECL_SENTINEL
				113	};
				114
				115	struct zbud_page {
				116	struct list_head bud_list;
				117	spinlock_t lock;
				118	struct zbud_hdr buddy[ZBUD_MAX_BUDS];
				119	DECL_SENTINEL
				120	/* followed by NUM_CHUNK aligned CHUNK_SIZE-byte chunks */
				121	};
				122
				123	#define CHUNK_SHIFT 6
				124	#define CHUNK_SIZE (1 << CHUNK_SHIFT)
				125	#define CHUNK_MASK (~(CHUNK_SIZE-1))
				126	#define NCHUNKS (((PAGE_SIZE - sizeof(struct zbud_page)) & \
				127	CHUNK_MASK) >> CHUNK_SHIFT)
				128	#define MAX_CHUNK (NCHUNKS-1)
				129
				130	static struct {
				131	struct list_head list;
				132	unsigned count;
				133	} zbud_unbuddied[NCHUNKS];
				134	/* list N contains pages with N chunks USED and NCHUNKS-N unused */
				135	/* element 0 is never used but optimizing that isn't worth it */
				136	static unsigned long zbud_cumul_chunk_counts[NCHUNKS];
				137
				138	struct list_head zbud_buddied_list;
				139	static unsigned long zcache_zbud_buddied_count;
				140
				141	/* protects the buddied list and all unbuddied lists */
				142	static DEFINE_SPINLOCK(zbud_budlists_spinlock);
				143
				144	static LIST_HEAD(zbpg_unused_list);
				145	static unsigned long zcache_zbpg_unused_list_count;
				146
				147	/* protects the unused page list */
				148	static DEFINE_SPINLOCK(zbpg_unused_list_spinlock);
				149
				150	static atomic_t zcache_zbud_curr_raw_pages;
				151	static atomic_t zcache_zbud_curr_zpages;
				152	static unsigned long zcache_zbud_curr_zbytes;
				153	static unsigned long zcache_zbud_cumul_zpages;
				154	static unsigned long zcache_zbud_cumul_zbytes;
				155	static unsigned long zcache_compress_poor;
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	156	static unsigned long zcache_mean_compress_poor;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	157
				158	/* forward references */
				159	static void *zcache_get_free_page(void);
				160	static void zcache_free_page(void *p);
				161
				162	/*
				163	* zbud helper functions
				164	*/
				165
				166	static inline unsigned zbud_max_buddy_size(void)
				167	{
				168	return MAX_CHUNK << CHUNK_SHIFT;
				169	}
				170
				171	static inline unsigned zbud_size_to_chunks(unsigned size)
				172	{
				173	BUG_ON(size == 0 \|\| size > zbud_max_buddy_size());
				174	return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
				175	}
				176
				177	static inline int zbud_budnum(struct zbud_hdr *zh)
				178	{
				179	unsigned offset = (unsigned long)zh & (PAGE_SIZE - 1);
				180	struct zbud_page *zbpg = NULL;
				181	unsigned budnum = -1U;
				182	int i;
				183
				184	for (i = 0; i < ZBUD_MAX_BUDS; i++)
				185	if (offset == offsetof(typeof(*zbpg), buddy[i])) {
				186	budnum = i;
				187	break;
				188	}
				189	BUG_ON(budnum == -1U);
				190	return budnum;
				191	}
				192
				193	static char zbud_data(struct zbud_hdr zh, unsigned size)
				194	{
				195	struct zbud_page *zbpg;
				196	char *p;
				197	unsigned budnum;
				198
				199	ASSERT_SENTINEL(zh, ZBH);
				200	budnum = zbud_budnum(zh);
				201	BUG_ON(size == 0 \|\| size > zbud_max_buddy_size());
				202	zbpg = container_of(zh, struct zbud_page, buddy[budnum]);
				203	ASSERT_SPINLOCK(&zbpg->lock);
				204	p = (char *)zbpg;
				205	if (budnum == 0)
				206	p += ((sizeof(struct zbud_page) + CHUNK_SIZE - 1) &
				207	CHUNK_MASK);
				208	else if (budnum == 1)
				209	p += PAGE_SIZE - ((size + CHUNK_SIZE - 1) & CHUNK_MASK);
				210	return p;
				211	}
				212
				213	/*
				214	* zbud raw page management
				215	*/
				216
				217	static struct zbud_page *zbud_alloc_raw_page(void)
				218	{
				219	struct zbud_page *zbpg = NULL;
				220	struct zbud_hdr zh0, zh1;
				221	bool recycled = 0;
				222
				223	/* if any pages on the zbpg list, use one */
				224	spin_lock(&zbpg_unused_list_spinlock);
				225	if (!list_empty(&zbpg_unused_list)) {
				226	zbpg = list_first_entry(&zbpg_unused_list,
				227	struct zbud_page, bud_list);
				228	list_del_init(&zbpg->bud_list);
				229	zcache_zbpg_unused_list_count--;
				230	recycled = 1;
				231	}
				232	spin_unlock(&zbpg_unused_list_spinlock);
				233	if (zbpg == NULL)
				234	/* none on zbpg list, try to get a kernel page */
				235	zbpg = zcache_get_free_page();
				236	if (likely(zbpg != NULL)) {
				237	INIT_LIST_HEAD(&zbpg->bud_list);
				238	zh0 = &zbpg->buddy[0]; zh1 = &zbpg->buddy[1];
				239	spin_lock_init(&zbpg->lock);
				240	if (recycled) {
				241	ASSERT_INVERTED_SENTINEL(zbpg, ZBPG);
				242	SET_SENTINEL(zbpg, ZBPG);
				243	BUG_ON(zh0->size != 0 \|\| tmem_oid_valid(&zh0->oid));
				244	BUG_ON(zh1->size != 0 \|\| tmem_oid_valid(&zh1->oid));
				245	} else {
				246	atomic_inc(&zcache_zbud_curr_raw_pages);
				247	INIT_LIST_HEAD(&zbpg->bud_list);
				248	SET_SENTINEL(zbpg, ZBPG);
				249	zh0->size = 0; zh1->size = 0;
				250	tmem_oid_set_invalid(&zh0->oid);
				251	tmem_oid_set_invalid(&zh1->oid);
				252	}
				253	}
				254	return zbpg;
				255	}
				256
				257	static void zbud_free_raw_page(struct zbud_page *zbpg)
				258	{
				259	struct zbud_hdr zh0 = &zbpg->buddy[0], zh1 = &zbpg->buddy[1];
				260
				261	ASSERT_SENTINEL(zbpg, ZBPG);
				262	BUG_ON(!list_empty(&zbpg->bud_list));
				263	ASSERT_SPINLOCK(&zbpg->lock);
				264	BUG_ON(zh0->size != 0 \|\| tmem_oid_valid(&zh0->oid));
				265	BUG_ON(zh1->size != 0 \|\| tmem_oid_valid(&zh1->oid));
				266	INVERT_SENTINEL(zbpg, ZBPG);
				267	spin_unlock(&zbpg->lock);
				268	spin_lock(&zbpg_unused_list_spinlock);
				269	list_add(&zbpg->bud_list, &zbpg_unused_list);
				270	zcache_zbpg_unused_list_count++;
				271	spin_unlock(&zbpg_unused_list_spinlock);
				272	}
				273
				274	/*
				275	* core zbud handling routines
				276	*/
				277
				278	static unsigned zbud_free(struct zbud_hdr *zh)
				279	{
				280	unsigned size;
				281
				282	ASSERT_SENTINEL(zh, ZBH);
				283	BUG_ON(!tmem_oid_valid(&zh->oid));
				284	size = zh->size;
				285	BUG_ON(zh->size == 0 \|\| zh->size > zbud_max_buddy_size());
				286	zh->size = 0;
				287	tmem_oid_set_invalid(&zh->oid);
				288	INVERT_SENTINEL(zh, ZBH);
				289	zcache_zbud_curr_zbytes -= size;
				290	atomic_dec(&zcache_zbud_curr_zpages);
				291	return size;
				292	}
				293
				294	static void zbud_free_and_delist(struct zbud_hdr *zh)
				295	{
				296	unsigned chunks;
				297	struct zbud_hdr *zh_other;
				298	unsigned budnum = zbud_budnum(zh), size;
				299	struct zbud_page *zbpg =
				300	container_of(zh, struct zbud_page, buddy[budnum]);
				301
				302	spin_lock(&zbpg->lock);
				303	if (list_empty(&zbpg->bud_list)) {
				304	/* ignore zombie page... see zbud_evict_pages() */
				305	spin_unlock(&zbpg->lock);
				306	return;
				307	}
				308	size = zbud_free(zh);
				309	ASSERT_SPINLOCK(&zbpg->lock);
				310	zh_other = &zbpg->buddy[(budnum == 0) ? 1 : 0];
				311	if (zh_other->size == 0) { /* was unbuddied: unlist and free */
				312	chunks = zbud_size_to_chunks(size) ;
				313	spin_lock(&zbud_budlists_spinlock);
				314	BUG_ON(list_empty(&zbud_unbuddied[chunks].list));
				315	list_del_init(&zbpg->bud_list);
				316	zbud_unbuddied[chunks].count--;
				317	spin_unlock(&zbud_budlists_spinlock);
				318	zbud_free_raw_page(zbpg);
				319	} else { /* was buddied: move remaining buddy to unbuddied list */
				320	chunks = zbud_size_to_chunks(zh_other->size) ;
				321	spin_lock(&zbud_budlists_spinlock);
				322	list_del_init(&zbpg->bud_list);
				323	zcache_zbud_buddied_count--;
				324	list_add_tail(&zbpg->bud_list, &zbud_unbuddied[chunks].list);
				325	zbud_unbuddied[chunks].count++;
				326	spin_unlock(&zbud_budlists_spinlock);
				327	spin_unlock(&zbpg->lock);
				328	}
				329	}
				330
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	331	static struct zbud_hdr *zbud_create(uint16_t client_id, uint16_t pool_id,
				332	struct tmem_oid *oid,
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	333	uint32_t index, struct page *page,
				334	void *cdata, unsigned size)
				335	{
				336	struct zbud_hdr zh0, zh1, *zh = NULL;
				337	struct zbud_page zbpg = NULL, ztmp;
				338	unsigned nchunks;
				339	char *to;
				340	int i, found_good_buddy = 0;
				341
				342	nchunks = zbud_size_to_chunks(size) ;
				343	for (i = MAX_CHUNK - nchunks + 1; i > 0; i--) {
				344	spin_lock(&zbud_budlists_spinlock);
				345	if (!list_empty(&zbud_unbuddied[i].list)) {
				346	list_for_each_entry_safe(zbpg, ztmp,
				347	&zbud_unbuddied[i].list, bud_list) {
				348	if (spin_trylock(&zbpg->lock)) {
				349	found_good_buddy = i;
				350	goto found_unbuddied;
				351	}
				352	}
				353	}
				354	spin_unlock(&zbud_budlists_spinlock);
				355	}
				356	/* didn't find a good buddy, try allocating a new page */
				357	zbpg = zbud_alloc_raw_page();
				358	if (unlikely(zbpg == NULL))
				359	goto out;
				360	/* ok, have a page, now compress the data before taking locks */
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	361	spin_lock(&zbud_budlists_spinlock);
Dan Magenheimer	9256a47	2012-01-25 14:32:51 -0800	[diff] [blame]	362	spin_lock(&zbpg->lock);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	363	list_add_tail(&zbpg->bud_list, &zbud_unbuddied[nchunks].list);
				364	zbud_unbuddied[nchunks].count++;
				365	zh = &zbpg->buddy[0];
				366	goto init_zh;
				367
				368	found_unbuddied:
				369	ASSERT_SPINLOCK(&zbpg->lock);
				370	zh0 = &zbpg->buddy[0]; zh1 = &zbpg->buddy[1];
				371	BUG_ON(!((zh0->size == 0) ^ (zh1->size == 0)));
				372	if (zh0->size != 0) { /* buddy0 in use, buddy1 is vacant */
				373	ASSERT_SENTINEL(zh0, ZBH);
				374	zh = zh1;
				375	} else if (zh1->size != 0) { /* buddy1 in use, buddy0 is vacant */
				376	ASSERT_SENTINEL(zh1, ZBH);
				377	zh = zh0;
				378	} else
				379	BUG();
				380	list_del_init(&zbpg->bud_list);
				381	zbud_unbuddied[found_good_buddy].count--;
				382	list_add_tail(&zbpg->bud_list, &zbud_buddied_list);
				383	zcache_zbud_buddied_count++;
				384
				385	init_zh:
				386	SET_SENTINEL(zh, ZBH);
				387	zh->size = size;
				388	zh->index = index;
				389	zh->oid = *oid;
				390	zh->pool_id = pool_id;
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	391	zh->client_id = client_id;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	392	to = zbud_data(zh, size);
				393	memcpy(to, cdata, size);
				394	spin_unlock(&zbpg->lock);
Dan Magenheimer	9256a47	2012-01-25 14:32:51 -0800	[diff] [blame]	395	spin_unlock(&zbud_budlists_spinlock);
				396
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	397	zbud_cumul_chunk_counts[nchunks]++;
				398	atomic_inc(&zcache_zbud_curr_zpages);
				399	zcache_zbud_cumul_zpages++;
				400	zcache_zbud_curr_zbytes += size;
				401	zcache_zbud_cumul_zbytes += size;
				402	out:
				403	return zh;
				404	}
				405
				406	static int zbud_decompress(struct page page, struct zbud_hdr zh)
				407	{
				408	struct zbud_page *zbpg;
				409	unsigned budnum = zbud_budnum(zh);
				410	size_t out_len = PAGE_SIZE;
				411	char to_va, from_va;
				412	unsigned size;
				413	int ret = 0;
				414
				415	zbpg = container_of(zh, struct zbud_page, buddy[budnum]);
				416	spin_lock(&zbpg->lock);
				417	if (list_empty(&zbpg->bud_list)) {
				418	/* ignore zombie page... see zbud_evict_pages() */
				419	ret = -EINVAL;
				420	goto out;
				421	}
				422	ASSERT_SENTINEL(zh, ZBH);
				423	BUG_ON(zh->size == 0 \|\| zh->size > zbud_max_buddy_size());
				424	to_va = kmap_atomic(page, KM_USER0);
				425	size = zh->size;
				426	from_va = zbud_data(zh, size);
				427	ret = lzo1x_decompress_safe(from_va, size, to_va, &out_len);
				428	BUG_ON(ret != LZO_E_OK);
				429	BUG_ON(out_len != PAGE_SIZE);
				430	kunmap_atomic(to_va, KM_USER0);
				431	out:
				432	spin_unlock(&zbpg->lock);
				433	return ret;
				434	}
				435
				436	/*
				437	* The following routines handle shrinking of ephemeral pages by evicting
				438	* pages "least valuable" first.
				439	*/
				440
				441	static unsigned long zcache_evicted_raw_pages;
				442	static unsigned long zcache_evicted_buddied_pages;
				443	static unsigned long zcache_evicted_unbuddied_pages;
				444
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	445	static struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id,
				446	uint16_t poolid);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	447	static void zcache_put_pool(struct tmem_pool *pool);
				448
				449	/*
				450	* Flush and free all zbuds in a zbpg, then free the pageframe
				451	*/
				452	static void zbud_evict_zbpg(struct zbud_page *zbpg)
				453	{
				454	struct zbud_hdr *zh;
				455	int i, j;
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	456	uint32_t pool_id[ZBUD_MAX_BUDS], client_id[ZBUD_MAX_BUDS];
				457	uint32_t index[ZBUD_MAX_BUDS];
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	458	struct tmem_oid oid[ZBUD_MAX_BUDS];
				459	struct tmem_pool *pool;
				460
				461	ASSERT_SPINLOCK(&zbpg->lock);
				462	BUG_ON(!list_empty(&zbpg->bud_list));
				463	for (i = 0, j = 0; i < ZBUD_MAX_BUDS; i++) {
				464	zh = &zbpg->buddy[i];
				465	if (zh->size) {
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	466	client_id[j] = zh->client_id;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	467	pool_id[j] = zh->pool_id;
				468	oid[j] = zh->oid;
				469	index[j] = zh->index;
				470	j++;
				471	zbud_free(zh);
				472	}
				473	}
				474	spin_unlock(&zbpg->lock);
				475	for (i = 0; i < j; i++) {
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	476	pool = zcache_get_pool_by_id(client_id[i], pool_id[i]);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	477	if (pool != NULL) {
				478	tmem_flush_page(pool, &oid[i], index[i]);
				479	zcache_put_pool(pool);
				480	}
				481	}
				482	ASSERT_SENTINEL(zbpg, ZBPG);
				483	spin_lock(&zbpg->lock);
				484	zbud_free_raw_page(zbpg);
				485	}
				486
				487	/*
				488	* Free nr pages. This code is funky because we want to hold the locks
				489	* protecting various lists for as short a time as possible, and in some
				490	* circumstances the list may change asynchronously when the list lock is
				491	* not held. In some cases we also trylock not only to avoid waiting on a
				492	* page in use by another cpu, but also to avoid potential deadlock due to
				493	* lock inversion.
				494	*/
				495	static void zbud_evict_pages(int nr)
				496	{
				497	struct zbud_page *zbpg;
				498	int i;
				499
				500	/* first try freeing any pages on unused list */
				501	retry_unused_list:
				502	spin_lock_bh(&zbpg_unused_list_spinlock);
				503	if (!list_empty(&zbpg_unused_list)) {
				504	/* can't walk list here, since it may change when unlocked */
				505	zbpg = list_first_entry(&zbpg_unused_list,
				506	struct zbud_page, bud_list);
				507	list_del_init(&zbpg->bud_list);
				508	zcache_zbpg_unused_list_count--;
				509	atomic_dec(&zcache_zbud_curr_raw_pages);
				510	spin_unlock_bh(&zbpg_unused_list_spinlock);
				511	zcache_free_page(zbpg);
				512	zcache_evicted_raw_pages++;
				513	if (--nr <= 0)
				514	goto out;
				515	goto retry_unused_list;
				516	}
				517	spin_unlock_bh(&zbpg_unused_list_spinlock);
				518
				519	/* now try freeing unbuddied pages, starting with least space avail */
				520	for (i = 0; i < MAX_CHUNK; i++) {
				521	retry_unbud_list_i:
				522	spin_lock_bh(&zbud_budlists_spinlock);
				523	if (list_empty(&zbud_unbuddied[i].list)) {
				524	spin_unlock_bh(&zbud_budlists_spinlock);
				525	continue;
				526	}
				527	list_for_each_entry(zbpg, &zbud_unbuddied[i].list, bud_list) {
				528	if (unlikely(!spin_trylock(&zbpg->lock)))
				529	continue;
				530	list_del_init(&zbpg->bud_list);
				531	zbud_unbuddied[i].count--;
				532	spin_unlock(&zbud_budlists_spinlock);
				533	zcache_evicted_unbuddied_pages++;
				534	/* want budlists unlocked when doing zbpg eviction */
				535	zbud_evict_zbpg(zbpg);
				536	local_bh_enable();
				537	if (--nr <= 0)
				538	goto out;
				539	goto retry_unbud_list_i;
				540	}
				541	spin_unlock_bh(&zbud_budlists_spinlock);
				542	}
				543
				544	/* as a last resort, free buddied pages */
				545	retry_bud_list:
				546	spin_lock_bh(&zbud_budlists_spinlock);
				547	if (list_empty(&zbud_buddied_list)) {
				548	spin_unlock_bh(&zbud_budlists_spinlock);
				549	goto out;
				550	}
				551	list_for_each_entry(zbpg, &zbud_buddied_list, bud_list) {
				552	if (unlikely(!spin_trylock(&zbpg->lock)))
				553	continue;
				554	list_del_init(&zbpg->bud_list);
				555	zcache_zbud_buddied_count--;
				556	spin_unlock(&zbud_budlists_spinlock);
				557	zcache_evicted_buddied_pages++;
				558	/* want budlists unlocked when doing zbpg eviction */
				559	zbud_evict_zbpg(zbpg);
				560	local_bh_enable();
				561	if (--nr <= 0)
				562	goto out;
				563	goto retry_bud_list;
				564	}
				565	spin_unlock_bh(&zbud_budlists_spinlock);
				566	out:
				567	return;
				568	}
				569
				570	static void zbud_init(void)
				571	{
				572	int i;
				573
				574	INIT_LIST_HEAD(&zbud_buddied_list);
				575	zcache_zbud_buddied_count = 0;
				576	for (i = 0; i < NCHUNKS; i++) {
				577	INIT_LIST_HEAD(&zbud_unbuddied[i].list);
				578	zbud_unbuddied[i].count = 0;
				579	}
				580	}
				581
				582	#ifdef CONFIG_SYSFS
				583	/*
				584	* These sysfs routines show a nice distribution of how many zbpg's are
				585	* currently (and have ever been placed) in each unbuddied list. It's fun
				586	* to watch but can probably go away before final merge.
				587	*/
				588	static int zbud_show_unbuddied_list_counts(char *buf)
				589	{
				590	int i;
				591	char *p = buf;
				592
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	593	for (i = 0; i < NCHUNKS; i++)
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	594	p += sprintf(p, "%u ", zbud_unbuddied[i].count);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	595	return p - buf;
				596	}
				597
				598	static int zbud_show_cumul_chunk_counts(char *buf)
				599	{
				600	unsigned long i, chunks = 0, total_chunks = 0, sum_total_chunks = 0;
				601	unsigned long total_chunks_lte_21 = 0, total_chunks_lte_32 = 0;
				602	unsigned long total_chunks_lte_42 = 0;
				603	char *p = buf;
				604
				605	for (i = 0; i < NCHUNKS; i++) {
				606	p += sprintf(p, "%lu ", zbud_cumul_chunk_counts[i]);
				607	chunks += zbud_cumul_chunk_counts[i];
				608	total_chunks += zbud_cumul_chunk_counts[i];
				609	sum_total_chunks += i * zbud_cumul_chunk_counts[i];
				610	if (i == 21)
				611	total_chunks_lte_21 = total_chunks;
				612	if (i == 32)
				613	total_chunks_lte_32 = total_chunks;
				614	if (i == 42)
				615	total_chunks_lte_42 = total_chunks;
				616	}
				617	p += sprintf(p, "<=21:%lu <=32:%lu <=42:%lu, mean:%lu\n",
				618	total_chunks_lte_21, total_chunks_lte_32, total_chunks_lte_42,
				619	chunks == 0 ? 0 : sum_total_chunks / chunks);
				620	return p - buf;
				621	}
				622	#endif
				623
				624	/**********
				625	* This "zv" PAM implementation combines the TLSF-based xvMalloc
				626	* with lzo1x compression to maximize the amount of data that can
				627	* be packed into a physical page.
				628	*
				629	* Zv represents a PAM page with the index and object (plus a "size" value
				630	* necessary for decompression) immediately preceding the compressed data.
				631	*/
				632
				633	#define ZVH_SENTINEL 0x43214321
				634
				635	struct zv_hdr {
				636	uint32_t pool_id;
				637	struct tmem_oid oid;
				638	uint32_t index;
				639	DECL_SENTINEL
				640	};
				641
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	642	/* rudimentary policy limits */
				643	/* total number of persistent pages may not exceed this percentage */
				644	static unsigned int zv_page_count_policy_percent = 75;
				645	/*
				646	* byte count defining poor compression; pages with greater zsize will be
				647	* rejected
				648	*/
				649	static unsigned int zv_max_zsize = (PAGE_SIZE / 8) * 7;
				650	/*
				651	* byte count defining poor mean compression; pages with greater zsize
				652	* will be rejected until sufficient better-compressed pages are accepted
Seth Jennings	0428fec	2011-08-22 13:50:08 -0500	[diff] [blame]	653	* driving the mean below this threshold
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	654	*/
				655	static unsigned int zv_max_mean_zsize = (PAGE_SIZE / 8) * 5;
				656
				657	static unsigned long zv_curr_dist_counts[NCHUNKS];
				658	static unsigned long zv_cumul_dist_counts[NCHUNKS];
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	659
				660	static struct zv_hdr zv_create(struct xv_pool xvpool, uint32_t pool_id,
				661	struct tmem_oid *oid, uint32_t index,
				662	void *cdata, unsigned clen)
				663	{
				664	struct page *page;
				665	struct zv_hdr *zv = NULL;
				666	uint32_t offset;
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	667	int alloc_size = clen + sizeof(struct zv_hdr);
				668	int chunks = (alloc_size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	669	int ret;
				670
				671	BUG_ON(!irqs_disabled());
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	672	BUG_ON(chunks >= NCHUNKS);
				673	ret = xv_malloc(xvpool, alloc_size,
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	674	&page, &offset, ZCACHE_GFP_MASK);
				675	if (unlikely(ret))
				676	goto out;
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	677	zv_curr_dist_counts[chunks]++;
				678	zv_cumul_dist_counts[chunks]++;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	679	zv = kmap_atomic(page, KM_USER0) + offset;
				680	zv->index = index;
				681	zv->oid = *oid;
				682	zv->pool_id = pool_id;
				683	SET_SENTINEL(zv, ZVH);
				684	memcpy((char *)zv + sizeof(struct zv_hdr), cdata, clen);
				685	kunmap_atomic(zv, KM_USER0);
				686	out:
				687	return zv;
				688	}
				689
				690	static void zv_free(struct xv_pool xvpool, struct zv_hdr zv)
				691	{
				692	unsigned long flags;
				693	struct page *page;
				694	uint32_t offset;
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	695	uint16_t size = xv_get_object_size(zv);
				696	int chunks = (size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	697
				698	ASSERT_SENTINEL(zv, ZVH);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	699	BUG_ON(chunks >= NCHUNKS);
				700	zv_curr_dist_counts[chunks]--;
				701	size -= sizeof(*zv);
				702	BUG_ON(size == 0);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	703	INVERT_SENTINEL(zv, ZVH);
				704	page = virt_to_page(zv);
				705	offset = (unsigned long)zv & ~PAGE_MASK;
				706	local_irq_save(flags);
				707	xv_free(xvpool, page, offset);
				708	local_irq_restore(flags);
				709	}
				710
				711	static void zv_decompress(struct page page, struct zv_hdr zv)
				712	{
				713	size_t clen = PAGE_SIZE;
				714	char *to_va;
				715	unsigned size;
				716	int ret;
				717
				718	ASSERT_SENTINEL(zv, ZVH);
				719	size = xv_get_object_size(zv) - sizeof(*zv);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	720	BUG_ON(size == 0);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	721	to_va = kmap_atomic(page, KM_USER0);
				722	ret = lzo1x_decompress_safe((char )zv + sizeof(zv),
				723	size, to_va, &clen);
				724	kunmap_atomic(to_va, KM_USER0);
				725	BUG_ON(ret != LZO_E_OK);
				726	BUG_ON(clen != PAGE_SIZE);
				727	}
				728
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	729	#ifdef CONFIG_SYSFS
				730	/*
				731	* show a distribution of compression stats for zv pages.
				732	*/
				733
				734	static int zv_curr_dist_counts_show(char *buf)
				735	{
				736	unsigned long i, n, chunks = 0, sum_total_chunks = 0;
				737	char *p = buf;
				738
				739	for (i = 0; i < NCHUNKS; i++) {
				740	n = zv_curr_dist_counts[i];
				741	p += sprintf(p, "%lu ", n);
				742	chunks += n;
				743	sum_total_chunks += i * n;
				744	}
				745	p += sprintf(p, "mean:%lu\n",
				746	chunks == 0 ? 0 : sum_total_chunks / chunks);
				747	return p - buf;
				748	}
				749
				750	static int zv_cumul_dist_counts_show(char *buf)
				751	{
				752	unsigned long i, n, chunks = 0, sum_total_chunks = 0;
				753	char *p = buf;
				754
				755	for (i = 0; i < NCHUNKS; i++) {
				756	n = zv_cumul_dist_counts[i];
				757	p += sprintf(p, "%lu ", n);
				758	chunks += n;
				759	sum_total_chunks += i * n;
				760	}
				761	p += sprintf(p, "mean:%lu\n",
				762	chunks == 0 ? 0 : sum_total_chunks / chunks);
				763	return p - buf;
				764	}
				765
				766	/*
				767	* setting zv_max_zsize via sysfs causes all persistent (e.g. swap)
				768	* pages that don't compress to less than this value (including metadata
				769	* overhead) to be rejected. We don't allow the value to get too close
				770	* to PAGE_SIZE.
				771	*/
				772	static ssize_t zv_max_zsize_show(struct kobject *kobj,
				773	struct kobj_attribute *attr,
				774	char *buf)
				775	{
				776	return sprintf(buf, "%u\n", zv_max_zsize);
				777	}
				778
				779	static ssize_t zv_max_zsize_store(struct kobject *kobj,
				780	struct kobj_attribute *attr,
				781	const char *buf, size_t count)
				782	{
				783	unsigned long val;
				784	int err;
				785
				786	if (!capable(CAP_SYS_ADMIN))
				787	return -EPERM;
				788
Bernhard Heinloth	ebadb73	2011-11-23 16:39:53 +0100	[diff] [blame]	789	err = kstrtoul(buf, 10, &val);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	790	if (err \|\| (val == 0) \|\| (val > (PAGE_SIZE / 8) * 7))
				791	return -EINVAL;
				792	zv_max_zsize = val;
				793	return count;
				794	}
				795
				796	/*
				797	* setting zv_max_mean_zsize via sysfs causes all persistent (e.g. swap)
				798	* pages that don't compress to less than this value (including metadata
				799	* overhead) to be rejected UNLESS the mean compression is also smaller
				800	* than this value. In other words, we are load-balancing-by-zsize the
				801	* accepted pages. Again, we don't allow the value to get too close
				802	* to PAGE_SIZE.
				803	*/
				804	static ssize_t zv_max_mean_zsize_show(struct kobject *kobj,
				805	struct kobj_attribute *attr,
				806	char *buf)
				807	{
				808	return sprintf(buf, "%u\n", zv_max_mean_zsize);
				809	}
				810
				811	static ssize_t zv_max_mean_zsize_store(struct kobject *kobj,
				812	struct kobj_attribute *attr,
				813	const char *buf, size_t count)
				814	{
				815	unsigned long val;
				816	int err;
				817
				818	if (!capable(CAP_SYS_ADMIN))
				819	return -EPERM;
				820
Bernhard Heinloth	ebadb73	2011-11-23 16:39:53 +0100	[diff] [blame]	821	err = kstrtoul(buf, 10, &val);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	822	if (err \|\| (val == 0) \|\| (val > (PAGE_SIZE / 8) * 7))
				823	return -EINVAL;
				824	zv_max_mean_zsize = val;
				825	return count;
				826	}
				827
				828	/*
				829	* setting zv_page_count_policy_percent via sysfs sets an upper bound of
				830	* persistent (e.g. swap) pages that will be retained according to:
				831	* (zv_page_count_policy_percent * totalram_pages) / 100)
				832	* when that limit is reached, further puts will be rejected (until
				833	* some pages have been flushed). Note that, due to compression,
				834	* this number may exceed 100; it defaults to 75 and we set an
				835	* arbitary limit of 150. A poor choice will almost certainly result
				836	* in OOM's, so this value should only be changed prudently.
				837	*/
				838	static ssize_t zv_page_count_policy_percent_show(struct kobject *kobj,
				839	struct kobj_attribute *attr,
				840	char *buf)
				841	{
				842	return sprintf(buf, "%u\n", zv_page_count_policy_percent);
				843	}
				844
				845	static ssize_t zv_page_count_policy_percent_store(struct kobject *kobj,
				846	struct kobj_attribute *attr,
				847	const char *buf, size_t count)
				848	{
				849	unsigned long val;
				850	int err;
				851
				852	if (!capable(CAP_SYS_ADMIN))
				853	return -EPERM;
				854
Bernhard Heinloth	ebadb73	2011-11-23 16:39:53 +0100	[diff] [blame]	855	err = kstrtoul(buf, 10, &val);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	856	if (err \|\| (val == 0) \|\| (val > 150))
				857	return -EINVAL;
				858	zv_page_count_policy_percent = val;
				859	return count;
				860	}
				861
				862	static struct kobj_attribute zcache_zv_max_zsize_attr = {
				863	.attr = { .name = "zv_max_zsize", .mode = 0644 },
				864	.show = zv_max_zsize_show,
				865	.store = zv_max_zsize_store,
				866	};
				867
				868	static struct kobj_attribute zcache_zv_max_mean_zsize_attr = {
				869	.attr = { .name = "zv_max_mean_zsize", .mode = 0644 },
				870	.show = zv_max_mean_zsize_show,
				871	.store = zv_max_mean_zsize_store,
				872	};
				873
				874	static struct kobj_attribute zcache_zv_page_count_policy_percent_attr = {
				875	.attr = { .name = "zv_page_count_policy_percent",
				876	.mode = 0644 },
				877	.show = zv_page_count_policy_percent_show,
				878	.store = zv_page_count_policy_percent_store,
				879	};
				880	#endif
				881
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	882	/*
				883	* zcache core code starts here
				884	*/
				885
				886	/* useful stats not collected by cleancache or frontswap */
				887	static unsigned long zcache_flush_total;
				888	static unsigned long zcache_flush_found;
				889	static unsigned long zcache_flobj_total;
				890	static unsigned long zcache_flobj_found;
				891	static unsigned long zcache_failed_eph_puts;
				892	static unsigned long zcache_failed_pers_puts;
				893
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	894	/*
				895	* Tmem operations assume the poolid implies the invoking client.
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	896	* Zcache only has one client (the kernel itself): LOCAL_CLIENT.
				897	* RAMster has each client numbered by cluster node, and a KVM version
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	898	* of zcache would have one client per guest and each client might
				899	* have a poolid==N.
				900	*/
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	901	static struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id, uint16_t poolid)
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	902	{
				903	struct tmem_pool *pool = NULL;
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	904	struct zcache_client *cli = NULL;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	905
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	906	if (cli_id == LOCAL_CLIENT)
				907	cli = &zcache_host;
				908	else {
				909	if (cli_id >= MAX_CLIENTS)
				910	goto out;
				911	cli = &zcache_clients[cli_id];
				912	if (cli == NULL)
				913	goto out;
				914	atomic_inc(&cli->refcount);
				915	}
				916	if (poolid < MAX_POOLS_PER_CLIENT) {
				917	pool = cli->tmem_pools[poolid];
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	918	if (pool != NULL)
				919	atomic_inc(&pool->refcount);
				920	}
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	921	out:
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	922	return pool;
				923	}
				924
				925	static void zcache_put_pool(struct tmem_pool *pool)
				926	{
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	927	struct zcache_client *cli = NULL;
				928
				929	if (pool == NULL)
				930	BUG();
				931	cli = pool->client;
				932	atomic_dec(&pool->refcount);
				933	atomic_dec(&cli->refcount);
				934	}
				935
				936	int zcache_new_client(uint16_t cli_id)
				937	{
				938	struct zcache_client *cli = NULL;
				939	int ret = -1;
				940
				941	if (cli_id == LOCAL_CLIENT)
				942	cli = &zcache_host;
				943	else if ((unsigned int)cli_id < MAX_CLIENTS)
				944	cli = &zcache_clients[cli_id];
				945	if (cli == NULL)
				946	goto out;
				947	if (cli->allocated)
				948	goto out;
				949	cli->allocated = 1;
				950	#ifdef CONFIG_FRONTSWAP
				951	cli->xvpool = xv_create_pool();
				952	if (cli->xvpool == NULL)
				953	goto out;
				954	#endif
				955	ret = 0;
				956	out:
				957	return ret;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	958	}
				959
				960	/* counters for debugging */
				961	static unsigned long zcache_failed_get_free_pages;
				962	static unsigned long zcache_failed_alloc;
				963	static unsigned long zcache_put_to_flush;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	964
				965	/*
				966	* for now, used named slabs so can easily track usage; later can
				967	* either just use kmalloc, or perhaps add a slab-like allocator
				968	* to more carefully manage total memory utilization
				969	*/
				970	static struct kmem_cache *zcache_objnode_cache;
				971	static struct kmem_cache *zcache_obj_cache;
				972	static atomic_t zcache_curr_obj_count = ATOMIC_INIT(0);
				973	static unsigned long zcache_curr_obj_count_max;
				974	static atomic_t zcache_curr_objnode_count = ATOMIC_INIT(0);
				975	static unsigned long zcache_curr_objnode_count_max;
				976
				977	/*
				978	* to avoid memory allocation recursion (e.g. due to direct reclaim), we
				979	* preload all necessary data structures so the hostops callbacks never
				980	* actually do a malloc
				981	*/
				982	struct zcache_preload {
				983	void *page;
				984	struct tmem_obj *obj;
				985	int nr;
				986	struct tmem_objnode *objnodes[OBJNODE_TREE_MAX_PATH];
				987	};
				988	static DEFINE_PER_CPU(struct zcache_preload, zcache_preloads) = { 0, };
				989
				990	static int zcache_do_preload(struct tmem_pool *pool)
				991	{
				992	struct zcache_preload *kp;
				993	struct tmem_objnode *objnode;
				994	struct tmem_obj *obj;
				995	void *page;
				996	int ret = -ENOMEM;
				997
				998	if (unlikely(zcache_objnode_cache == NULL))
				999	goto out;
				1000	if (unlikely(zcache_obj_cache == NULL))
				1001	goto out;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1002	preempt_disable();
				1003	kp = &__get_cpu_var(zcache_preloads);
				1004	while (kp->nr < ARRAY_SIZE(kp->objnodes)) {
				1005	preempt_enable_no_resched();
				1006	objnode = kmem_cache_alloc(zcache_objnode_cache,
				1007	ZCACHE_GFP_MASK);
				1008	if (unlikely(objnode == NULL)) {
				1009	zcache_failed_alloc++;
Seth Jennings	00bf256	2011-10-12 14:41:00 -0500	[diff] [blame]	1010	goto out;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1011	}
				1012	preempt_disable();
				1013	kp = &__get_cpu_var(zcache_preloads);
				1014	if (kp->nr < ARRAY_SIZE(kp->objnodes))
				1015	kp->objnodes[kp->nr++] = objnode;
				1016	else
				1017	kmem_cache_free(zcache_objnode_cache, objnode);
				1018	}
				1019	preempt_enable_no_resched();
				1020	obj = kmem_cache_alloc(zcache_obj_cache, ZCACHE_GFP_MASK);
				1021	if (unlikely(obj == NULL)) {
				1022	zcache_failed_alloc++;
Seth Jennings	00bf256	2011-10-12 14:41:00 -0500	[diff] [blame]	1023	goto out;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1024	}
				1025	page = (void *)__get_free_page(ZCACHE_GFP_MASK);
				1026	if (unlikely(page == NULL)) {
				1027	zcache_failed_get_free_pages++;
Vasiliy Kulikov	69648be	2011-02-10 21:00:39 +0300	[diff] [blame]	1028	kmem_cache_free(zcache_obj_cache, obj);
Seth Jennings	00bf256	2011-10-12 14:41:00 -0500	[diff] [blame]	1029	goto out;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1030	}
				1031	preempt_disable();
				1032	kp = &__get_cpu_var(zcache_preloads);
				1033	if (kp->obj == NULL)
				1034	kp->obj = obj;
				1035	else
				1036	kmem_cache_free(zcache_obj_cache, obj);
				1037	if (kp->page == NULL)
				1038	kp->page = page;
				1039	else
				1040	free_page((unsigned long)page);
				1041	ret = 0;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1042	out:
				1043	return ret;
				1044	}
				1045
				1046	static void *zcache_get_free_page(void)
				1047	{
				1048	struct zcache_preload *kp;
				1049	void *page;
				1050
				1051	kp = &__get_cpu_var(zcache_preloads);
				1052	page = kp->page;
				1053	BUG_ON(page == NULL);
				1054	kp->page = NULL;
				1055	return page;
				1056	}
				1057
				1058	static void zcache_free_page(void *p)
				1059	{
				1060	free_page((unsigned long)p);
				1061	}
				1062
				1063	/*
				1064	* zcache implementation for tmem host ops
				1065	*/
				1066
				1067	static struct tmem_objnode zcache_objnode_alloc(struct tmem_pool pool)
				1068	{
				1069	struct tmem_objnode *objnode = NULL;
				1070	unsigned long count;
				1071	struct zcache_preload *kp;
				1072
				1073	kp = &__get_cpu_var(zcache_preloads);
				1074	if (kp->nr <= 0)
				1075	goto out;
				1076	objnode = kp->objnodes[kp->nr - 1];
				1077	BUG_ON(objnode == NULL);
				1078	kp->objnodes[kp->nr - 1] = NULL;
				1079	kp->nr--;
				1080	count = atomic_inc_return(&zcache_curr_objnode_count);
				1081	if (count > zcache_curr_objnode_count_max)
				1082	zcache_curr_objnode_count_max = count;
				1083	out:
				1084	return objnode;
				1085	}
				1086
				1087	static void zcache_objnode_free(struct tmem_objnode *objnode,
				1088	struct tmem_pool *pool)
				1089	{
				1090	atomic_dec(&zcache_curr_objnode_count);
				1091	BUG_ON(atomic_read(&zcache_curr_objnode_count) < 0);
				1092	kmem_cache_free(zcache_objnode_cache, objnode);
				1093	}
				1094
				1095	static struct tmem_obj zcache_obj_alloc(struct tmem_pool pool)
				1096	{
				1097	struct tmem_obj *obj = NULL;
				1098	unsigned long count;
				1099	struct zcache_preload *kp;
				1100
				1101	kp = &__get_cpu_var(zcache_preloads);
				1102	obj = kp->obj;
				1103	BUG_ON(obj == NULL);
				1104	kp->obj = NULL;
				1105	count = atomic_inc_return(&zcache_curr_obj_count);
				1106	if (count > zcache_curr_obj_count_max)
				1107	zcache_curr_obj_count_max = count;
				1108	return obj;
				1109	}
				1110
				1111	static void zcache_obj_free(struct tmem_obj obj, struct tmem_pool pool)
				1112	{
				1113	atomic_dec(&zcache_curr_obj_count);
				1114	BUG_ON(atomic_read(&zcache_curr_obj_count) < 0);
				1115	kmem_cache_free(zcache_obj_cache, obj);
				1116	}
				1117
				1118	static struct tmem_hostops zcache_hostops = {
				1119	.obj_alloc = zcache_obj_alloc,
				1120	.obj_free = zcache_obj_free,
				1121	.objnode_alloc = zcache_objnode_alloc,
				1122	.objnode_free = zcache_objnode_free,
				1123	};
				1124
				1125	/*
				1126	* zcache implementations for PAM page descriptor ops
				1127	*/
				1128
				1129	static atomic_t zcache_curr_eph_pampd_count = ATOMIC_INIT(0);
				1130	static unsigned long zcache_curr_eph_pampd_count_max;
				1131	static atomic_t zcache_curr_pers_pampd_count = ATOMIC_INIT(0);
				1132	static unsigned long zcache_curr_pers_pampd_count_max;
				1133
				1134	/* forward reference */
				1135	static int zcache_compress(struct page from, void out_va, size_t out_len);
				1136
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1137	static void zcache_pampd_create(char data, size_t size, bool raw, int eph,
				1138	struct tmem_pool pool, struct tmem_oid oid,
				1139	uint32_t index)
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1140	{
				1141	void pampd = NULL, cdata;
				1142	size_t clen;
				1143	int ret;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1144	unsigned long count;
Seth Jennings	c5f5c4d	2011-08-10 12:56:49 -0500	[diff] [blame]	1145	struct page page = (struct page )(data);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1146	struct zcache_client *cli = pool->client;
				1147	uint16_t client_id = get_client_id_from_client(cli);
				1148	unsigned long zv_mean_zsize;
				1149	unsigned long curr_pers_pampd_count;
Thadeu Lima de Souza Cascardo	3ca15c4	2011-08-04 19:00:33 -0300	[diff] [blame]	1150	u64 total_zsize;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1151
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1152	if (eph) {
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1153	ret = zcache_compress(page, &cdata, &clen);
				1154	if (ret == 0)
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1155	goto out;
				1156	if (clen == 0 \|\| clen > zbud_max_buddy_size()) {
				1157	zcache_compress_poor++;
				1158	goto out;
				1159	}
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1160	pampd = (void *)zbud_create(client_id, pool->pool_id, oid,
				1161	index, page, cdata, clen);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1162	if (pampd != NULL) {
				1163	count = atomic_inc_return(&zcache_curr_eph_pampd_count);
				1164	if (count > zcache_curr_eph_pampd_count_max)
				1165	zcache_curr_eph_pampd_count_max = count;
				1166	}
				1167	} else {
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1168	curr_pers_pampd_count =
				1169	atomic_read(&zcache_curr_pers_pampd_count);
				1170	if (curr_pers_pampd_count >
				1171	(zv_page_count_policy_percent * totalram_pages) / 100)
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1172	goto out;
				1173	ret = zcache_compress(page, &cdata, &clen);
				1174	if (ret == 0)
				1175	goto out;
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1176	/* reject if compression is too poor */
				1177	if (clen > zv_max_zsize) {
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1178	zcache_compress_poor++;
				1179	goto out;
				1180	}
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1181	/* reject if mean compression is too poor */
				1182	if ((clen > zv_max_mean_zsize) && (curr_pers_pampd_count > 0)) {
Thadeu Lima de Souza Cascardo	3ca15c4	2011-08-04 19:00:33 -0300	[diff] [blame]	1183	total_zsize = xv_get_total_size_bytes(cli->xvpool);
				1184	zv_mean_zsize = div_u64(total_zsize,
				1185	curr_pers_pampd_count);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1186	if (zv_mean_zsize > zv_max_mean_zsize) {
				1187	zcache_mean_compress_poor++;
				1188	goto out;
				1189	}
				1190	}
				1191	pampd = (void *)zv_create(cli->xvpool, pool->pool_id,
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1192	oid, index, cdata, clen);
				1193	if (pampd == NULL)
				1194	goto out;
				1195	count = atomic_inc_return(&zcache_curr_pers_pampd_count);
				1196	if (count > zcache_curr_pers_pampd_count_max)
				1197	zcache_curr_pers_pampd_count_max = count;
				1198	}
				1199	out:
				1200	return pampd;
				1201	}
				1202
				1203	/*
				1204	* fill the pageframe corresponding to the struct page with the data
				1205	* from the passed pampd
				1206	*/
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1207	static int zcache_pampd_get_data(char data, size_t bufsize, bool raw,
				1208	void pampd, struct tmem_pool pool,
				1209	struct tmem_oid *oid, uint32_t index)
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1210	{
				1211	int ret = 0;
				1212
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1213	BUG_ON(is_ephemeral(pool));
Seth Jennings	c5f5c4d	2011-08-10 12:56:49 -0500	[diff] [blame]	1214	zv_decompress((struct page *)(data), pampd);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1215	return ret;
				1216	}
				1217
				1218	/*
				1219	* fill the pageframe corresponding to the struct page with the data
				1220	* from the passed pampd
				1221	*/
				1222	static int zcache_pampd_get_data_and_free(char data, size_t bufsize, bool raw,
				1223	void pampd, struct tmem_pool pool,
				1224	struct tmem_oid *oid, uint32_t index)
				1225	{
				1226	int ret = 0;
				1227
				1228	BUG_ON(!is_ephemeral(pool));
Seth Jennings	8097680	2011-09-20 13:09:56 -0700	[diff] [blame]	1229	zbud_decompress((struct page *)(data), pampd);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1230	zbud_free_and_delist((struct zbud_hdr *)pampd);
				1231	atomic_dec(&zcache_curr_eph_pampd_count);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1232	return ret;
				1233	}
				1234
				1235	/*
				1236	* free the pampd and remove it from any zcache lists
				1237	* pampd must no longer be pointed to from any tmem data structures!
				1238	*/
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1239	static void zcache_pampd_free(void pampd, struct tmem_pool pool,
				1240	struct tmem_oid *oid, uint32_t index)
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1241	{
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1242	struct zcache_client *cli = pool->client;
				1243
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1244	if (is_ephemeral(pool)) {
				1245	zbud_free_and_delist((struct zbud_hdr *)pampd);
				1246	atomic_dec(&zcache_curr_eph_pampd_count);
				1247	BUG_ON(atomic_read(&zcache_curr_eph_pampd_count) < 0);
				1248	} else {
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1249	zv_free(cli->xvpool, (struct zv_hdr *)pampd);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1250	atomic_dec(&zcache_curr_pers_pampd_count);
				1251	BUG_ON(atomic_read(&zcache_curr_pers_pampd_count) < 0);
				1252	}
				1253	}
				1254
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1255	static void zcache_pampd_free_obj(struct tmem_pool pool, struct tmem_obj obj)
				1256	{
				1257	}
				1258
				1259	static void zcache_pampd_new_obj(struct tmem_obj *obj)
				1260	{
				1261	}
				1262
				1263	static int zcache_pampd_replace_in_obj(void pampd, struct tmem_obj obj)
				1264	{
				1265	return -1;
				1266	}
				1267
				1268	static bool zcache_pampd_is_remote(void *pampd)
				1269	{
				1270	return 0;
				1271	}
				1272
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1273	static struct tmem_pamops zcache_pamops = {
				1274	.create = zcache_pampd_create,
				1275	.get_data = zcache_pampd_get_data,
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1276	.get_data_and_free = zcache_pampd_get_data_and_free,
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1277	.free = zcache_pampd_free,
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1278	.free_obj = zcache_pampd_free_obj,
				1279	.new_obj = zcache_pampd_new_obj,
				1280	.replace_in_obj = zcache_pampd_replace_in_obj,
				1281	.is_remote = zcache_pampd_is_remote,
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1282	};
				1283
				1284	/*
				1285	* zcache compression/decompression and related per-cpu stuff
				1286	*/
				1287
				1288	#define LZO_WORKMEM_BYTES LZO1X_1_MEM_COMPRESS
				1289	#define LZO_DSTMEM_PAGE_ORDER 1
				1290	static DEFINE_PER_CPU(unsigned char *, zcache_workmem);
				1291	static DEFINE_PER_CPU(unsigned char *, zcache_dstmem);
				1292
				1293	static int zcache_compress(struct page from, void out_va, size_t out_len)
				1294	{
				1295	int ret = 0;
				1296	unsigned char *dmem = __get_cpu_var(zcache_dstmem);
				1297	unsigned char *wmem = __get_cpu_var(zcache_workmem);
				1298	char *from_va;
				1299
				1300	BUG_ON(!irqs_disabled());
				1301	if (unlikely(dmem == NULL \|\| wmem == NULL))
				1302	goto out; /* no buffer, so can't compress */
				1303	from_va = kmap_atomic(from, KM_USER0);
				1304	mb();
				1305	ret = lzo1x_1_compress(from_va, PAGE_SIZE, dmem, out_len, wmem);
				1306	BUG_ON(ret != LZO_E_OK);
				1307	*out_va = dmem;
				1308	kunmap_atomic(from_va, KM_USER0);
				1309	ret = 1;
				1310	out:
				1311	return ret;
				1312	}
				1313
				1314
				1315	static int zcache_cpu_notifier(struct notifier_block *nb,
				1316	unsigned long action, void *pcpu)
				1317	{
				1318	int cpu = (long)pcpu;
				1319	struct zcache_preload *kp;
				1320
				1321	switch (action) {
				1322	case CPU_UP_PREPARE:
				1323	per_cpu(zcache_dstmem, cpu) = (void *)__get_free_pages(
				1324	GFP_KERNEL \| __GFP_REPEAT,
				1325	LZO_DSTMEM_PAGE_ORDER),
				1326	per_cpu(zcache_workmem, cpu) =
				1327	kzalloc(LZO1X_MEM_COMPRESS,
				1328	GFP_KERNEL \| __GFP_REPEAT);
				1329	break;
				1330	case CPU_DEAD:
				1331	case CPU_UP_CANCELED:
				1332	free_pages((unsigned long)per_cpu(zcache_dstmem, cpu),
				1333	LZO_DSTMEM_PAGE_ORDER);
				1334	per_cpu(zcache_dstmem, cpu) = NULL;
				1335	kfree(per_cpu(zcache_workmem, cpu));
				1336	per_cpu(zcache_workmem, cpu) = NULL;
				1337	kp = &per_cpu(zcache_preloads, cpu);
				1338	while (kp->nr) {
				1339	kmem_cache_free(zcache_objnode_cache,
				1340	kp->objnodes[kp->nr - 1]);
				1341	kp->objnodes[kp->nr - 1] = NULL;
				1342	kp->nr--;
				1343	}
Seth Jennings	8550be0	2011-10-06 14:28:26 -0500	[diff] [blame]	1344	if (kp->obj) {
				1345	kmem_cache_free(zcache_obj_cache, kp->obj);
				1346	kp->obj = NULL;
				1347	}
				1348	if (kp->page) {
				1349	free_page((unsigned long)kp->page);
				1350	kp->page = NULL;
				1351	}
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1352	break;
				1353	default:
				1354	break;
				1355	}
				1356	return NOTIFY_OK;
				1357	}
				1358
				1359	static struct notifier_block zcache_cpu_notifier_block = {
				1360	.notifier_call = zcache_cpu_notifier
				1361	};
				1362
				1363	#ifdef CONFIG_SYSFS
				1364	#define ZCACHE_SYSFS_RO(_name) \
				1365	static ssize_t zcache_##_name##_show(struct kobject *kobj, \
				1366	struct kobj_attribute attr, char buf) \
				1367	{ \
				1368	return sprintf(buf, "%lu\n", zcache_##_name); \
				1369	} \
				1370	static struct kobj_attribute zcache_##_name##_attr = { \
				1371	.attr = { .name = __stringify(_name), .mode = 0444 }, \
				1372	.show = zcache_##_name##_show, \
				1373	}
				1374
				1375	#define ZCACHE_SYSFS_RO_ATOMIC(_name) \
				1376	static ssize_t zcache_##_name##_show(struct kobject *kobj, \
				1377	struct kobj_attribute attr, char buf) \
				1378	{ \
				1379	return sprintf(buf, "%d\n", atomic_read(&zcache_##_name)); \
				1380	} \
				1381	static struct kobj_attribute zcache_##_name##_attr = { \
				1382	.attr = { .name = __stringify(_name), .mode = 0444 }, \
				1383	.show = zcache_##_name##_show, \
				1384	}
				1385
				1386	#define ZCACHE_SYSFS_RO_CUSTOM(_name, _func) \
				1387	static ssize_t zcache_##_name##_show(struct kobject *kobj, \
				1388	struct kobj_attribute attr, char buf) \
				1389	{ \
				1390	return _func(buf); \
				1391	} \
				1392	static struct kobj_attribute zcache_##_name##_attr = { \
				1393	.attr = { .name = __stringify(_name), .mode = 0444 }, \
				1394	.show = zcache_##_name##_show, \
				1395	}
				1396
				1397	ZCACHE_SYSFS_RO(curr_obj_count_max);
				1398	ZCACHE_SYSFS_RO(curr_objnode_count_max);
				1399	ZCACHE_SYSFS_RO(flush_total);
				1400	ZCACHE_SYSFS_RO(flush_found);
				1401	ZCACHE_SYSFS_RO(flobj_total);
				1402	ZCACHE_SYSFS_RO(flobj_found);
				1403	ZCACHE_SYSFS_RO(failed_eph_puts);
				1404	ZCACHE_SYSFS_RO(failed_pers_puts);
				1405	ZCACHE_SYSFS_RO(zbud_curr_zbytes);
				1406	ZCACHE_SYSFS_RO(zbud_cumul_zpages);
				1407	ZCACHE_SYSFS_RO(zbud_cumul_zbytes);
				1408	ZCACHE_SYSFS_RO(zbud_buddied_count);
				1409	ZCACHE_SYSFS_RO(zbpg_unused_list_count);
				1410	ZCACHE_SYSFS_RO(evicted_raw_pages);
				1411	ZCACHE_SYSFS_RO(evicted_unbuddied_pages);
				1412	ZCACHE_SYSFS_RO(evicted_buddied_pages);
				1413	ZCACHE_SYSFS_RO(failed_get_free_pages);
				1414	ZCACHE_SYSFS_RO(failed_alloc);
				1415	ZCACHE_SYSFS_RO(put_to_flush);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1416	ZCACHE_SYSFS_RO(compress_poor);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1417	ZCACHE_SYSFS_RO(mean_compress_poor);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1418	ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_raw_pages);
				1419	ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_zpages);
				1420	ZCACHE_SYSFS_RO_ATOMIC(curr_obj_count);
				1421	ZCACHE_SYSFS_RO_ATOMIC(curr_objnode_count);
				1422	ZCACHE_SYSFS_RO_CUSTOM(zbud_unbuddied_list_counts,
				1423	zbud_show_unbuddied_list_counts);
				1424	ZCACHE_SYSFS_RO_CUSTOM(zbud_cumul_chunk_counts,
				1425	zbud_show_cumul_chunk_counts);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1426	ZCACHE_SYSFS_RO_CUSTOM(zv_curr_dist_counts,
				1427	zv_curr_dist_counts_show);
				1428	ZCACHE_SYSFS_RO_CUSTOM(zv_cumul_dist_counts,
				1429	zv_cumul_dist_counts_show);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1430
				1431	static struct attribute *zcache_attrs[] = {
				1432	&zcache_curr_obj_count_attr.attr,
				1433	&zcache_curr_obj_count_max_attr.attr,
				1434	&zcache_curr_objnode_count_attr.attr,
				1435	&zcache_curr_objnode_count_max_attr.attr,
				1436	&zcache_flush_total_attr.attr,
				1437	&zcache_flobj_total_attr.attr,
				1438	&zcache_flush_found_attr.attr,
				1439	&zcache_flobj_found_attr.attr,
				1440	&zcache_failed_eph_puts_attr.attr,
				1441	&zcache_failed_pers_puts_attr.attr,
				1442	&zcache_compress_poor_attr.attr,
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1443	&zcache_mean_compress_poor_attr.attr,
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1444	&zcache_zbud_curr_raw_pages_attr.attr,
				1445	&zcache_zbud_curr_zpages_attr.attr,
				1446	&zcache_zbud_curr_zbytes_attr.attr,
				1447	&zcache_zbud_cumul_zpages_attr.attr,
				1448	&zcache_zbud_cumul_zbytes_attr.attr,
				1449	&zcache_zbud_buddied_count_attr.attr,
				1450	&zcache_zbpg_unused_list_count_attr.attr,
				1451	&zcache_evicted_raw_pages_attr.attr,
				1452	&zcache_evicted_unbuddied_pages_attr.attr,
				1453	&zcache_evicted_buddied_pages_attr.attr,
				1454	&zcache_failed_get_free_pages_attr.attr,
				1455	&zcache_failed_alloc_attr.attr,
				1456	&zcache_put_to_flush_attr.attr,
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1457	&zcache_zbud_unbuddied_list_counts_attr.attr,
				1458	&zcache_zbud_cumul_chunk_counts_attr.attr,
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1459	&zcache_zv_curr_dist_counts_attr.attr,
				1460	&zcache_zv_cumul_dist_counts_attr.attr,
				1461	&zcache_zv_max_zsize_attr.attr,
				1462	&zcache_zv_max_mean_zsize_attr.attr,
				1463	&zcache_zv_page_count_policy_percent_attr.attr,
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1464	NULL,
				1465	};
				1466
				1467	static struct attribute_group zcache_attr_group = {
				1468	.attrs = zcache_attrs,
				1469	.name = "zcache",
				1470	};
				1471
				1472	#endif /* CONFIG_SYSFS */
				1473	/*
				1474	* When zcache is disabled ("frozen"), pools can be created and destroyed,
				1475	* but all puts (and thus all other operations that require memory allocation)
				1476	* must fail. If zcache is unfrozen, accepts puts, then frozen again,
				1477	* data consistency requires all puts while frozen to be converted into
				1478	* flushes.
				1479	*/
				1480	static bool zcache_freeze;
				1481
				1482	/*
				1483	* zcache shrinker interface (only useful for ephemeral pages, so zbud only)
				1484	*/
Ying Han	1495f23	2011-05-24 17:12:27 -0700	[diff] [blame]	1485	static int shrink_zcache_memory(struct shrinker *shrink,
				1486	struct shrink_control *sc)
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1487	{
				1488	int ret = -1;
Ying Han	1495f23	2011-05-24 17:12:27 -0700	[diff] [blame]	1489	int nr = sc->nr_to_scan;
				1490	gfp_t gfp_mask = sc->gfp_mask;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1491
				1492	if (nr >= 0) {
				1493	if (!(gfp_mask & __GFP_FS))
				1494	/* does this case really need to be skipped? */
				1495	goto out;
Seth Jennings	00bf256	2011-10-12 14:41:00 -0500	[diff] [blame]	1496	zbud_evict_pages(nr);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1497	}
				1498	ret = (int)atomic_read(&zcache_zbud_curr_raw_pages);
				1499	out:
				1500	return ret;
				1501	}
				1502
				1503	static struct shrinker zcache_shrinker = {
				1504	.shrink = shrink_zcache_memory,
				1505	.seeks = DEFAULT_SEEKS,
				1506	};
				1507
				1508	/*
				1509	* zcache shims between cleancache/frontswap ops and tmem
				1510	*/
				1511
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1512	static int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp,
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1513	uint32_t index, struct page *page)
				1514	{
				1515	struct tmem_pool *pool;
				1516	int ret = -1;
				1517
				1518	BUG_ON(!irqs_disabled());
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1519	pool = zcache_get_pool_by_id(cli_id, pool_id);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1520	if (unlikely(pool == NULL))
				1521	goto out;
				1522	if (!zcache_freeze && zcache_do_preload(pool) == 0) {
				1523	/* preload does preempt_disable on success */
Seth Jennings	c5f5c4d	2011-08-10 12:56:49 -0500	[diff] [blame]	1524	ret = tmem_put(pool, oidp, index, (char *)(page),
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1525	PAGE_SIZE, 0, is_ephemeral(pool));
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1526	if (ret < 0) {
				1527	if (is_ephemeral(pool))
				1528	zcache_failed_eph_puts++;
				1529	else
				1530	zcache_failed_pers_puts++;
				1531	}
				1532	zcache_put_pool(pool);
				1533	preempt_enable_no_resched();
				1534	} else {
				1535	zcache_put_to_flush++;
				1536	if (atomic_read(&pool->obj_count) > 0)
				1537	/* the put fails whether the flush succeeds or not */
				1538	(void)tmem_flush_page(pool, oidp, index);
				1539	zcache_put_pool(pool);
				1540	}
				1541	out:
				1542	return ret;
				1543	}
				1544
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1545	static int zcache_get_page(int cli_id, int pool_id, struct tmem_oid *oidp,
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1546	uint32_t index, struct page *page)
				1547	{
				1548	struct tmem_pool *pool;
				1549	int ret = -1;
				1550	unsigned long flags;
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1551	size_t size = PAGE_SIZE;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1552
				1553	local_irq_save(flags);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1554	pool = zcache_get_pool_by_id(cli_id, pool_id);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1555	if (likely(pool != NULL)) {
				1556	if (atomic_read(&pool->obj_count) > 0)
Seth Jennings	c5f5c4d	2011-08-10 12:56:49 -0500	[diff] [blame]	1557	ret = tmem_get(pool, oidp, index, (char *)(page),
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1558	&size, 0, is_ephemeral(pool));
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1559	zcache_put_pool(pool);
				1560	}
				1561	local_irq_restore(flags);
				1562	return ret;
				1563	}
				1564
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1565	static int zcache_flush_page(int cli_id, int pool_id,
				1566	struct tmem_oid *oidp, uint32_t index)
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1567	{
				1568	struct tmem_pool *pool;
				1569	int ret = -1;
				1570	unsigned long flags;
				1571
				1572	local_irq_save(flags);
				1573	zcache_flush_total++;
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1574	pool = zcache_get_pool_by_id(cli_id, pool_id);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1575	if (likely(pool != NULL)) {
				1576	if (atomic_read(&pool->obj_count) > 0)
				1577	ret = tmem_flush_page(pool, oidp, index);
				1578	zcache_put_pool(pool);
				1579	}
				1580	if (ret >= 0)
				1581	zcache_flush_found++;
				1582	local_irq_restore(flags);
				1583	return ret;
				1584	}
				1585
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1586	static int zcache_flush_object(int cli_id, int pool_id,
				1587	struct tmem_oid *oidp)
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1588	{
				1589	struct tmem_pool *pool;
				1590	int ret = -1;
				1591	unsigned long flags;
				1592
				1593	local_irq_save(flags);
				1594	zcache_flobj_total++;
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1595	pool = zcache_get_pool_by_id(cli_id, pool_id);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1596	if (likely(pool != NULL)) {
				1597	if (atomic_read(&pool->obj_count) > 0)
				1598	ret = tmem_flush_object(pool, oidp);
				1599	zcache_put_pool(pool);
				1600	}
				1601	if (ret >= 0)
				1602	zcache_flobj_found++;
				1603	local_irq_restore(flags);
				1604	return ret;
				1605	}
				1606
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1607	static int zcache_destroy_pool(int cli_id, int pool_id)
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1608	{
				1609	struct tmem_pool *pool = NULL;
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1610	struct zcache_client *cli = NULL;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1611	int ret = -1;
				1612
				1613	if (pool_id < 0)
				1614	goto out;
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1615	if (cli_id == LOCAL_CLIENT)
				1616	cli = &zcache_host;
				1617	else if ((unsigned int)cli_id < MAX_CLIENTS)
				1618	cli = &zcache_clients[cli_id];
				1619	if (cli == NULL)
				1620	goto out;
				1621	atomic_inc(&cli->refcount);
				1622	pool = cli->tmem_pools[pool_id];
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1623	if (pool == NULL)
				1624	goto out;
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1625	cli->tmem_pools[pool_id] = NULL;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1626	/* wait for pool activity on other cpus to quiesce */
				1627	while (atomic_read(&pool->refcount) != 0)
				1628	;
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1629	atomic_dec(&cli->refcount);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1630	local_bh_disable();
				1631	ret = tmem_destroy_pool(pool);
				1632	local_bh_enable();
				1633	kfree(pool);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1634	pr_info("zcache: destroyed pool id=%d, cli_id=%d\n",
				1635	pool_id, cli_id);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1636	out:
				1637	return ret;
				1638	}
				1639
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1640	static int zcache_new_pool(uint16_t cli_id, uint32_t flags)
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1641	{
				1642	int poolid = -1;
				1643	struct tmem_pool *pool;
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1644	struct zcache_client *cli = NULL;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1645
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1646	if (cli_id == LOCAL_CLIENT)
				1647	cli = &zcache_host;
				1648	else if ((unsigned int)cli_id < MAX_CLIENTS)
				1649	cli = &zcache_clients[cli_id];
				1650	if (cli == NULL)
				1651	goto out;
				1652	atomic_inc(&cli->refcount);
Seth Jennings	dbe82eb	2011-08-22 14:30:38 -0500	[diff] [blame]	1653	pool = kmalloc(sizeof(struct tmem_pool), GFP_ATOMIC);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1654	if (pool == NULL) {
				1655	pr_info("zcache: pool creation failed: out of memory\n");
				1656	goto out;
				1657	}
				1658
				1659	for (poolid = 0; poolid < MAX_POOLS_PER_CLIENT; poolid++)
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1660	if (cli->tmem_pools[poolid] == NULL)
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1661	break;
				1662	if (poolid >= MAX_POOLS_PER_CLIENT) {
				1663	pr_info("zcache: pool creation failed: max exceeded\n");
				1664	kfree(pool);
				1665	poolid = -1;
				1666	goto out;
				1667	}
				1668	atomic_set(&pool->refcount, 0);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1669	pool->client = cli;
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1670	pool->pool_id = poolid;
				1671	tmem_new_pool(pool, flags);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1672	cli->tmem_pools[poolid] = pool;
				1673	pr_info("zcache: created %s tmem pool, id=%d, client=%d\n",
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1674	flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral",
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1675	poolid, cli_id);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1676	out:
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1677	if (cli != NULL)
				1678	atomic_dec(&cli->refcount);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1679	return poolid;
				1680	}
				1681
				1682	/**********
				1683	* Two kernel functionalities currently can be layered on top of tmem.
				1684	* These are "cleancache" which is used as a second-chance cache for clean
				1685	* page cache pages; and "frontswap" which is used for swap pages
				1686	* to avoid writes to disk. A generic "shim" is provided here for each
				1687	* to translate in-kernel semantics to zcache semantics.
				1688	*/
				1689
				1690	#ifdef CONFIG_CLEANCACHE
				1691	static void zcache_cleancache_put_page(int pool_id,
				1692	struct cleancache_filekey key,
				1693	pgoff_t index, struct page *page)
				1694	{
				1695	u32 ind = (u32) index;
				1696	struct tmem_oid oid = (struct tmem_oid )&key;
				1697
				1698	if (likely(ind == index))
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1699	(void)zcache_put_page(LOCAL_CLIENT, pool_id, &oid, index, page);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1700	}
				1701
				1702	static int zcache_cleancache_get_page(int pool_id,
				1703	struct cleancache_filekey key,
				1704	pgoff_t index, struct page *page)
				1705	{
				1706	u32 ind = (u32) index;
				1707	struct tmem_oid oid = (struct tmem_oid )&key;
				1708	int ret = -1;
				1709
				1710	if (likely(ind == index))
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1711	ret = zcache_get_page(LOCAL_CLIENT, pool_id, &oid, index, page);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1712	return ret;
				1713	}
				1714
				1715	static void zcache_cleancache_flush_page(int pool_id,
				1716	struct cleancache_filekey key,
				1717	pgoff_t index)
				1718	{
				1719	u32 ind = (u32) index;
				1720	struct tmem_oid oid = (struct tmem_oid )&key;
				1721
				1722	if (likely(ind == index))
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1723	(void)zcache_flush_page(LOCAL_CLIENT, pool_id, &oid, ind);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1724	}
				1725
				1726	static void zcache_cleancache_flush_inode(int pool_id,
				1727	struct cleancache_filekey key)
				1728	{
				1729	struct tmem_oid oid = (struct tmem_oid )&key;
				1730
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1731	(void)zcache_flush_object(LOCAL_CLIENT, pool_id, &oid);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1732	}
				1733
				1734	static void zcache_cleancache_flush_fs(int pool_id)
				1735	{
				1736	if (pool_id >= 0)
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1737	(void)zcache_destroy_pool(LOCAL_CLIENT, pool_id);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1738	}
				1739
				1740	static int zcache_cleancache_init_fs(size_t pagesize)
				1741	{
				1742	BUG_ON(sizeof(struct cleancache_filekey) !=
				1743	sizeof(struct tmem_oid));
				1744	BUG_ON(pagesize != PAGE_SIZE);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1745	return zcache_new_pool(LOCAL_CLIENT, 0);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1746	}
				1747
				1748	static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize)
				1749	{
				1750	/* shared pools are unsupported and map to private */
				1751	BUG_ON(sizeof(struct cleancache_filekey) !=
				1752	sizeof(struct tmem_oid));
				1753	BUG_ON(pagesize != PAGE_SIZE);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1754	return zcache_new_pool(LOCAL_CLIENT, 0);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1755	}
				1756
				1757	static struct cleancache_ops zcache_cleancache_ops = {
				1758	.put_page = zcache_cleancache_put_page,
				1759	.get_page = zcache_cleancache_get_page,
				1760	.flush_page = zcache_cleancache_flush_page,
				1761	.flush_inode = zcache_cleancache_flush_inode,
				1762	.flush_fs = zcache_cleancache_flush_fs,
				1763	.init_shared_fs = zcache_cleancache_init_shared_fs,
				1764	.init_fs = zcache_cleancache_init_fs
				1765	};
				1766
				1767	struct cleancache_ops zcache_cleancache_register_ops(void)
				1768	{
				1769	struct cleancache_ops old_ops =
				1770	cleancache_register_ops(&zcache_cleancache_ops);
				1771
				1772	return old_ops;
				1773	}
				1774	#endif
				1775
				1776	#ifdef CONFIG_FRONTSWAP
				1777	/* a single tmem poolid is used for all frontswap "types" (swapfiles) */
				1778	static int zcache_frontswap_poolid = -1;
				1779
				1780	/*
				1781	* Swizzling increases objects per swaptype, increasing tmem concurrency
				1782	* for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS
Seth Jennings	3d65c85	2011-10-04 08:21:32 -0500	[diff] [blame]	1783	* Setting SWIZ_BITS to 27 basically reconstructs the swap entry from
Dan Magenheimer	e8b4553	2012-01-23 16:52:20 -0500	[diff] [blame^]	1784	* frontswap_get_page(), but has side-effects. Hence using 8.
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1785	*/
Dan Magenheimer	e8b4553	2012-01-23 16:52:20 -0500	[diff] [blame^]	1786	#define SWIZ_BITS 8
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1787	#define SWIZ_MASK ((1 << SWIZ_BITS) - 1)
				1788	#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) \| (_ind & SWIZ_MASK))
				1789	#define iswiz(_ind) (_ind >> SWIZ_BITS)
				1790
				1791	static inline struct tmem_oid oswiz(unsigned type, u32 ind)
				1792	{
				1793	struct tmem_oid oid = { .oid = { 0 } };
				1794	oid.oid[0] = _oswiz(type, ind);
				1795	return oid;
				1796	}
				1797
				1798	static int zcache_frontswap_put_page(unsigned type, pgoff_t offset,
				1799	struct page *page)
				1800	{
				1801	u64 ind64 = (u64)offset;
				1802	u32 ind = (u32)offset;
				1803	struct tmem_oid oid = oswiz(type, ind);
				1804	int ret = -1;
				1805	unsigned long flags;
				1806
				1807	BUG_ON(!PageLocked(page));
				1808	if (likely(ind64 == ind)) {
				1809	local_irq_save(flags);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1810	ret = zcache_put_page(LOCAL_CLIENT, zcache_frontswap_poolid,
				1811	&oid, iswiz(ind), page);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1812	local_irq_restore(flags);
				1813	}
				1814	return ret;
				1815	}
				1816
				1817	/* returns 0 if the page was successfully gotten from frontswap, -1 if
				1818	* was not present (should never happen!) */
				1819	static int zcache_frontswap_get_page(unsigned type, pgoff_t offset,
				1820	struct page *page)
				1821	{
				1822	u64 ind64 = (u64)offset;
				1823	u32 ind = (u32)offset;
				1824	struct tmem_oid oid = oswiz(type, ind);
				1825	int ret = -1;
				1826
				1827	BUG_ON(!PageLocked(page));
				1828	if (likely(ind64 == ind))
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1829	ret = zcache_get_page(LOCAL_CLIENT, zcache_frontswap_poolid,
				1830	&oid, iswiz(ind), page);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1831	return ret;
				1832	}
				1833
				1834	/* flush a single page from frontswap */
				1835	static void zcache_frontswap_flush_page(unsigned type, pgoff_t offset)
				1836	{
				1837	u64 ind64 = (u64)offset;
				1838	u32 ind = (u32)offset;
				1839	struct tmem_oid oid = oswiz(type, ind);
				1840
				1841	if (likely(ind64 == ind))
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1842	(void)zcache_flush_page(LOCAL_CLIENT, zcache_frontswap_poolid,
				1843	&oid, iswiz(ind));
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1844	}
				1845
				1846	/* flush all pages from the passed swaptype */
				1847	static void zcache_frontswap_flush_area(unsigned type)
				1848	{
				1849	struct tmem_oid oid;
				1850	int ind;
				1851
				1852	for (ind = SWIZ_MASK; ind >= 0; ind--) {
				1853	oid = oswiz(type, ind);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1854	(void)zcache_flush_object(LOCAL_CLIENT,
				1855	zcache_frontswap_poolid, &oid);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1856	}
				1857	}
				1858
				1859	static void zcache_frontswap_init(unsigned ignored)
				1860	{
				1861	/* a single tmem poolid is used for all frontswap "types" (swapfiles) */
				1862	if (zcache_frontswap_poolid < 0)
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1863	zcache_frontswap_poolid =
				1864	zcache_new_pool(LOCAL_CLIENT, TMEM_POOL_PERSIST);
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1865	}
				1866
				1867	static struct frontswap_ops zcache_frontswap_ops = {
				1868	.put_page = zcache_frontswap_put_page,
				1869	.get_page = zcache_frontswap_get_page,
				1870	.flush_page = zcache_frontswap_flush_page,
				1871	.flush_area = zcache_frontswap_flush_area,
				1872	.init = zcache_frontswap_init
				1873	};
				1874
				1875	struct frontswap_ops zcache_frontswap_register_ops(void)
				1876	{
				1877	struct frontswap_ops old_ops =
				1878	frontswap_register_ops(&zcache_frontswap_ops);
				1879
				1880	return old_ops;
				1881	}
				1882	#endif
				1883
				1884	/*
				1885	* zcache initialization
				1886	* NOTE FOR NOW zcache MUST BE PROVIDED AS A KERNEL BOOT PARAMETER OR
				1887	* NOTHING HAPPENS!
				1888	*/
				1889
				1890	static int zcache_enabled;
				1891
				1892	static int __init enable_zcache(char *s)
				1893	{
				1894	zcache_enabled = 1;
				1895	return 1;
				1896	}
				1897	__setup("zcache", enable_zcache);
				1898
				1899	/* allow independent dynamic disabling of cleancache and frontswap */
				1900
				1901	static int use_cleancache = 1;
				1902
				1903	static int __init no_cleancache(char *s)
				1904	{
				1905	use_cleancache = 0;
				1906	return 1;
				1907	}
				1908
				1909	__setup("nocleancache", no_cleancache);
				1910
				1911	static int use_frontswap = 1;
				1912
				1913	static int __init no_frontswap(char *s)
				1914	{
				1915	use_frontswap = 0;
				1916	return 1;
				1917	}
				1918
				1919	__setup("nofrontswap", no_frontswap);
				1920
				1921	static int __init zcache_init(void)
				1922	{
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1923	int ret = 0;
				1924
Nitin Gupta	d8c778f	2011-08-04 15:05:24 -0700	[diff] [blame]	1925	#ifdef CONFIG_SYSFS
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1926	ret = sysfs_create_group(mm_kobj, &zcache_attr_group);
				1927	if (ret) {
				1928	pr_err("zcache: can't create sysfs\n");
				1929	goto out;
				1930	}
				1931	#endif /* CONFIG_SYSFS */
				1932	#if defined(CONFIG_CLEANCACHE) \|\| defined(CONFIG_FRONTSWAP)
				1933	if (zcache_enabled) {
				1934	unsigned int cpu;
				1935
				1936	tmem_register_hostops(&zcache_hostops);
				1937	tmem_register_pamops(&zcache_pamops);
				1938	ret = register_cpu_notifier(&zcache_cpu_notifier_block);
				1939	if (ret) {
				1940	pr_err("zcache: can't register cpu notifier\n");
				1941	goto out;
				1942	}
				1943	for_each_online_cpu(cpu) {
				1944	void pcpu = (void )(long)cpu;
				1945	zcache_cpu_notifier(&zcache_cpu_notifier_block,
				1946	CPU_UP_PREPARE, pcpu);
				1947	}
				1948	}
				1949	zcache_objnode_cache = kmem_cache_create("zcache_objnode",
				1950	sizeof(struct tmem_objnode), 0, 0, NULL);
				1951	zcache_obj_cache = kmem_cache_create("zcache_obj",
				1952	sizeof(struct tmem_obj), 0, 0, NULL);
Dan Magenheimer	966b901	2011-07-07 07:37:19 -0700	[diff] [blame]	1953	ret = zcache_new_client(LOCAL_CLIENT);
				1954	if (ret) {
				1955	pr_err("zcache: can't create client\n");
				1956	goto out;
				1957	}
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1958	#endif
				1959	#ifdef CONFIG_CLEANCACHE
				1960	if (zcache_enabled && use_cleancache) {
				1961	struct cleancache_ops old_ops;
				1962
				1963	zbud_init();
				1964	register_shrinker(&zcache_shrinker);
				1965	old_ops = zcache_cleancache_register_ops();
				1966	pr_info("zcache: cleancache enabled using kernel "
				1967	"transcendent memory and compression buddies\n");
				1968	if (old_ops.init_fs != NULL)
				1969	pr_warning("zcache: cleancache_ops overridden");
				1970	}
				1971	#endif
				1972	#ifdef CONFIG_FRONTSWAP
				1973	if (zcache_enabled && use_frontswap) {
				1974	struct frontswap_ops old_ops;
				1975
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1976	old_ops = zcache_frontswap_register_ops();
				1977	pr_info("zcache: frontswap enabled using kernel "
				1978	"transcendent memory and xvmalloc\n");
				1979	if (old_ops.init != NULL)
Seth Jennings	0428fec	2011-08-22 13:50:08 -0500	[diff] [blame]	1980	pr_warning("zcache: frontswap_ops overridden");
Dan Magenheimer	9cc06bf	2011-02-06 19:26:08 -0800	[diff] [blame]	1981	}
				1982	#endif
				1983	out:
				1984	return ret;
				1985	}
				1986
				1987	module_init(zcache_init)