Blame - mm/zcache.c - kernel/msm

blob: 66396e6450c8c7be08e2ceaec221a79bace00a5f [file] [log] [blame]

Steve Kondik	f94910d	2016-04-14 02:52:21 -0700	[diff] [blame]	1	/*
				2	* linux/mm/zcache.c
				3	*
				4	* A cleancache backend for file pages compression.
				5	* Concepts based on original zcache by Dan Magenheimer.
				6	* Copyright (C) 2013 Bob Liu <bob.liu@xxxxxxxxxx>
				7	*
				8	* With zcache, active file pages can be compressed in memory during page
				9	* reclaiming. When their data is needed again the I/O reading operation is
				10	* avoided. This results in a significant performance gain under memory pressure
				11	* for systems with many file pages.
				12	*
				13	* This program is free software; you can redistribute it and/or
				14	* modify it under the terms of the GNU General Public License
				15	* as published by the Free Software Foundation; either version 2
				16	* of the License, or (at your option) any later version.
				17	*
				18	* This program is distributed in the hope that it will be useful,
				19	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				20	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				21	* GNU General Public License for more details.
				22	*/
				23
				24	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
				25
				26	#include <linux/atomic.h>
				27	#include <linux/cleancache.h>
				28	#include <linux/cpu.h>
				29	#include <linux/crypto.h>
				30	#include <linux/page-flags.h>
				31	#include <linux/pagemap.h>
				32	#include <linux/highmem.h>
				33	#include <linux/mm_types.h>
				34	#include <linux/module.h>
				35	#include <linux/slab.h>
				36	#include <linux/spinlock.h>
				37	#include <linux/radix-tree.h>
				38	#include <linux/rbtree.h>
				39	#include <linux/types.h>
				40	#include <linux/zbud.h>
				41
				42	/*
				43	* Enable/disable zcache (disabled by default)
				44	*/
				45	static bool zcache_enabled __read_mostly;
				46	module_param_named(enabled, zcache_enabled, bool, 0);
				47
				48	/*
				49	* Compressor to be used by zcache
				50	*/
				51	#define ZCACHE_COMPRESSOR_DEFAULT "lzo"
				52	static char *zcache_compressor = ZCACHE_COMPRESSOR_DEFAULT;
				53	module_param_named(compressor, zcache_compressor, charp, 0);
				54
				55	/*
				56	* The maximum percentage of memory that the compressed pool can occupy.
				57	*/
				58	static unsigned int zcache_max_pool_percent = 10;
				59	module_param_named(max_pool_percent, zcache_max_pool_percent, uint, 0644);
				60
				61	static unsigned int zcache_clear_percent = 4;
				62	module_param_named(clear_percent, zcache_clear_percent, uint, 0644);
				63	/*
				64	* zcache statistics
				65	*/
				66	static u64 zcache_pool_limit_hit;
				67	static u64 zcache_dup_entry;
				68	static u64 zcache_zbud_alloc_fail;
				69	static u64 zcache_evict_zpages;
				70	static u64 zcache_evict_filepages;
				71	static u64 zcache_inactive_pages_refused;
				72	static u64 zcache_reclaim_fail;
				73	static u64 zcache_pool_shrink;
				74	static u64 zcache_pool_shrink_fail;
				75	static u64 zcache_pool_shrink_pages;
				76	static u64 zcache_store_failed;
				77	static atomic_t zcache_stored_pages = ATOMIC_INIT(0);
				78	static atomic_t zcache_stored_zero_pages = ATOMIC_INIT(0);
				79
				80	#define GFP_ZCACHE \
				81	(__GFP_FS \| __GFP_NORETRY \| __GFP_NOWARN \| \
				82	__GFP_NOMEMALLOC \| __GFP_NO_KSWAPD \| __GFP_ZERO)
				83
				84	/*
				85	* Make sure this is different from radix tree
				86	* indirect ptr or exceptional entry.
				87	*/
				88	#define ZERO_HANDLE ((void *)~(~0UL >> 1))
				89
				90	/*
				91	* Zcache receives pages for compression through the Cleancache API and is able
				92	* to evict pages from its own compressed pool on an LRU basis in the case that
				93	* the compressed pool is full.
				94	*
				95	* Zcache makes use of zbud for the managing the compressed memory pool. Each
				96	* allocation in zbud is not directly accessible by address. Rather, a handle
				97	* (zaddr) is return by the allocation routine and that handle(zaddr must be
				98	* mapped before being accessed. The compressed memory pool grows on demand and
				99	* shrinks as compressed pages are freed.
				100	*
				101	* When a file page is passed from cleancache to zcache, zcache maintains a
				102	* mapping of the <filesystem_type, inode_number, page_index> to the zbud
				103	* address that references that compressed file page. This mapping is achieved
				104	* with a red-black tree per filesystem type, plus a radix tree per red-black
				105	* node.
				106	*
				107	* A zcache pool with pool_id as the index is created when a filesystem mounted
				108	* Each zcache pool has a red-black tree, the inode number(rb_index) is the
				109	* search key. Each red-black tree node has a radix tree which use
				110	* page->index(ra_index) as the index. Each radix tree slot points to the zbud
				111	* address combining with some extra information(zcache_ra_handle).
				112	*/
				113	#define MAX_ZCACHE_POOLS 32
				114	/*
				115	* One zcache_pool per (cleancache aware) filesystem mount instance
				116	*/
				117	struct zcache_pool {
				118	struct rb_root rbtree;
				119	rwlock_t rb_lock; /* Protects rbtree */
				120	u64 size;
				121	struct zbud_pool pool; / Zbud pool used */
				122	};
				123
				124	/*
				125	* Manage all zcache pools
				126	*/
				127	struct _zcache {
				128	struct zcache_pool *pools[MAX_ZCACHE_POOLS];
				129	u32 num_pools; /* Current no. of zcache pools */
				130	spinlock_t pool_lock; /* Protects pools[] and num_pools */
				131	};
				132	struct _zcache zcache;
				133
				134	/*
				135	* Redblack tree node, each node has a page index radix-tree.
				136	* Indexed by inode nubmer.
				137	*/
				138	struct zcache_rbnode {
				139	struct rb_node rb_node;
				140	int rb_index;
				141	struct radix_tree_root ratree; /* Page radix tree per inode rbtree */
				142	spinlock_t ra_lock; /* Protects radix tree */
				143	struct kref refcount;
				144	};
				145
				146	/*
				147	* Radix-tree leaf, indexed by page->index
				148	*/
				149	struct zcache_ra_handle {
				150	int rb_index; /* Redblack tree index */
				151	int ra_index; /* Radix tree index */
				152	int zlen; /* Compressed page size */
				153	struct zcache_pool zpool; / Finding zcache_pool during evict */
				154	};
				155
				156	u64 zcache_pages(void)
				157	{
				158	int i;
				159	u64 count = 0;
				160
				161	for (i = 0; (i < MAX_ZCACHE_POOLS) && zcache.pools[i]; i++)
				162	count += zcache.pools[i]->size;
				163
				164	return count;
				165	}
				166
				167	static struct kmem_cache *zcache_rbnode_cache;
				168	static int zcache_rbnode_cache_create(void)
				169	{
				170	zcache_rbnode_cache = KMEM_CACHE(zcache_rbnode, 0);
				171	return zcache_rbnode_cache == NULL;
				172	}
				173	static void zcache_rbnode_cache_destroy(void)
				174	{
				175	kmem_cache_destroy(zcache_rbnode_cache);
				176	}
				177
				178	static int zcache_shrink(struct shrinker s, struct shrink_control sc)
				179	{
				180	unsigned long active_file;
				181	unsigned long file;
				182	long file_gap;
				183	unsigned long freed = 0;
				184	unsigned long pool;
				185	static bool running;
				186	int i = 0;
				187	int retries;
				188
				189	if (running)
				190	goto end;
				191
				192	running = true;
				193	active_file = global_page_state(NR_ACTIVE_FILE);
				194	file = global_page_state(NR_FILE_PAGES);
				195	pool = zcache_pages();
				196
				197	file_gap = pool - file;
				198
				199	if ((file_gap >= 0) &&
				200	(totalram_pages * zcache_clear_percent / 100 > file)) {
				201	file_gap = pool;
				202	zcache_pool_shrink++;
				203	goto reclaim;
				204	}
				205
				206	/*
				207	* file_gap == 0 means that the number of pages
				208	* stored by zcache is around twice as many as the
				209	* number of active file pages.
				210	*/
				211	file_gap = pool - active_file;
				212	if (file_gap < 0)
				213	file_gap = 0;
				214	else
				215	zcache_pool_shrink++;
				216
				217	reclaim:
				218	retries = file_gap;
				219	while ((file_gap > 0) && retries) {
				220	struct zcache_pool *zpool =
				221	zcache.pools[i++ % MAX_ZCACHE_POOLS];
				222	if (!zpool \|\| !zpool->size)
				223	continue;
				224	if (zbud_reclaim_page(zpool->pool, 8)) {
				225	zcache_pool_shrink_fail++;
				226	retries--;
				227	continue;
				228	}
				229	freed++;
				230	file_gap--;
				231	}
				232
				233	zcache_pool_shrink_pages += freed;
				234	for (i = 0; (i < MAX_ZCACHE_POOLS) && zcache.pools[i]; i++)
				235	zcache.pools[i]->size =
				236	zbud_get_pool_size(zcache.pools[i]->pool);
				237
				238	running = false;
				239	end:
				240	return freed;
				241	}
				242
				243	static struct shrinker zcache_shrinker = {
				244	.shrink = zcache_shrink,
				245	.seeks = DEFAULT_SEEKS * 16
				246	};
				247
				248	/*
				249	* Compression functions
				250	* (Below functions are copyed from zswap!)
				251	*/
				252	static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms;
				253
				254	enum comp_op {
				255	ZCACHE_COMPOP_COMPRESS,
				256	ZCACHE_COMPOP_DECOMPRESS
				257	};
				258
				259	static int zcache_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
				260	u8 dst, unsigned int dlen)
				261	{
				262	struct crypto_comp *tfm;
				263	int ret;
				264
				265	tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, get_cpu());
				266	switch (op) {
				267	case ZCACHE_COMPOP_COMPRESS:
				268	ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
				269	break;
				270	case ZCACHE_COMPOP_DECOMPRESS:
				271	ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
				272	break;
				273	default:
				274	ret = -EINVAL;
				275	}
				276
				277	put_cpu();
				278	return ret;
				279	}
				280
				281	static int __init zcache_comp_init(void)
				282	{
				283	if (!crypto_has_comp(zcache_compressor, 0, 0)) {
				284	pr_info("%s compressor not available\n", zcache_compressor);
				285	/* fall back to default compressor */
				286	zcache_compressor = ZCACHE_COMPRESSOR_DEFAULT;
				287	if (!crypto_has_comp(zcache_compressor, 0, 0))
				288	/* can't even load the default compressor */
				289	return -ENODEV;
				290	}
				291	pr_info("using %s compressor\n", zcache_compressor);
				292
				293	/* alloc percpu transforms */
				294	zcache_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
				295	if (!zcache_comp_pcpu_tfms)
				296	return -ENOMEM;
				297	return 0;
				298	}
				299
				300	static void zcache_comp_exit(void)
				301	{
				302	/* free percpu transforms */
				303	if (zcache_comp_pcpu_tfms)
				304	free_percpu(zcache_comp_pcpu_tfms);
				305	}
				306
				307	/*
				308	* Per-cpu code
				309	* (Below functions are also copyed from zswap!)
				310	*/
				311	static DEFINE_PER_CPU(u8 *, zcache_dstmem);
				312
				313	static int __zcache_cpu_notifier(unsigned long action, unsigned long cpu)
				314	{
				315	struct crypto_comp *tfm;
				316	u8 *dst;
				317
				318	switch (action) {
				319	case CPU_UP_PREPARE:
				320	tfm = crypto_alloc_comp(zcache_compressor, 0, 0);
				321	if (IS_ERR(tfm)) {
				322	pr_err("can't allocate compressor transform\n");
				323	return NOTIFY_BAD;
				324	}
				325	*per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = tfm;
				326	dst = kmalloc(PAGE_SIZE * 2, GFP_KERNEL);
				327	if (!dst) {
				328	pr_err("can't allocate compressor buffer\n");
				329	crypto_free_comp(tfm);
				330	*per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL;
				331	return NOTIFY_BAD;
				332	}
				333	per_cpu(zcache_dstmem, cpu) = dst;
				334	break;
				335	case CPU_DEAD:
				336	case CPU_UP_CANCELED:
				337	tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu);
				338	if (tfm) {
				339	crypto_free_comp(tfm);
				340	*per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL;
				341	}
				342	dst = per_cpu(zcache_dstmem, cpu);
				343	kfree(dst);
				344	per_cpu(zcache_dstmem, cpu) = NULL;
				345	break;
				346	default:
				347	break;
				348	}
				349	return NOTIFY_OK;
				350	}
				351
				352	static int zcache_cpu_notifier(struct notifier_block *nb,
				353	unsigned long action, void *pcpu)
				354	{
				355	unsigned long cpu = (unsigned long)pcpu;
				356
				357	return __zcache_cpu_notifier(action, cpu);
				358	}
				359
				360	static struct notifier_block zcache_cpu_notifier_block = {
				361	.notifier_call = zcache_cpu_notifier
				362	};
				363
				364	static int zcache_cpu_init(void)
				365	{
				366	unsigned long cpu;
				367
				368	get_online_cpus();
				369	for_each_online_cpu(cpu)
				370	if (__zcache_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
				371	goto cleanup;
				372	register_cpu_notifier(&zcache_cpu_notifier_block);
				373	put_online_cpus();
				374	return 0;
				375
				376	cleanup:
				377	for_each_online_cpu(cpu)
				378	__zcache_cpu_notifier(CPU_UP_CANCELED, cpu);
				379	put_online_cpus();
				380	return -ENOMEM;
				381	}
				382
				383	/*
				384	* Zcache helpers
				385	*/
				386	static bool zcache_is_full(void)
				387	{
				388	long file = global_page_state(NR_FILE_PAGES);
				389
				390	return ((totalram_pages * zcache_max_pool_percent / 100 <
				391	zcache_pages()) \|\|
				392	(totalram_pages * zcache_clear_percent / 100 >
				393	file));
				394	}
				395
				396	/*
				397	* The caller must hold zpool->rb_lock at least
				398	*/
				399	static struct zcache_rbnode zcache_find_rbnode(struct rb_root rbtree,
				400	int index, struct rb_node rb_parent, struct rb_node *rb_link)
				401	{
				402	struct zcache_rbnode *entry;
				403	struct rb_node *__rb_link, __rb_parent, *rb_prev;
				404
				405	__rb_link = &rbtree->rb_node;
				406	rb_prev = __rb_parent = NULL;
				407
				408	while (*__rb_link) {
				409	__rb_parent = *__rb_link;
				410	entry = rb_entry(__rb_parent, struct zcache_rbnode, rb_node);
				411	if (entry->rb_index > index)
				412	__rb_link = &__rb_parent->rb_left;
				413	else if (entry->rb_index < index) {
				414	rb_prev = __rb_parent;
				415	__rb_link = &__rb_parent->rb_right;
				416	} else
				417	return entry;
				418	}
				419
				420	if (rb_parent)
				421	*rb_parent = __rb_parent;
				422	if (rb_link)
				423	*rb_link = __rb_link;
				424	return NULL;
				425	}
				426
				427	static struct zcache_rbnode zcache_find_get_rbnode(struct zcache_pool zpool,
				428	int rb_index)
				429	{
				430	unsigned long flags;
				431	struct zcache_rbnode *rbnode;
				432
				433	read_lock_irqsave(&zpool->rb_lock, flags);
				434	rbnode = zcache_find_rbnode(&zpool->rbtree, rb_index, 0, 0);
				435	if (rbnode)
				436	kref_get(&rbnode->refcount);
				437	read_unlock_irqrestore(&zpool->rb_lock, flags);
				438	return rbnode;
				439	}
				440
				441	/*
				442	* kref_put callback for zcache_rbnode.
				443	*
				444	* The rbnode must have been isolated from rbtree already.
				445	*/
				446	static void zcache_rbnode_release(struct kref *kref)
				447	{
				448	struct zcache_rbnode *rbnode;
				449
				450	rbnode = container_of(kref, struct zcache_rbnode, refcount);
				451	BUG_ON(rbnode->ratree.rnode);
				452	kmem_cache_free(zcache_rbnode_cache, rbnode);
				453	}
				454
				455	/*
				456	* Check whether the radix-tree of this rbnode is empty.
				457	* If that's true, then we can delete this zcache_rbnode from
				458	* zcache_pool->rbtree
				459	*
				460	* Caller must hold zcache_rbnode->ra_lock
				461	*/
				462	static int zcache_rbnode_empty(struct zcache_rbnode *rbnode)
				463	{
				464	return rbnode->ratree.rnode == NULL;
				465	}
				466
				467	/*
				468	* Remove zcache_rbnode from zpool->rbtree
				469	*
				470	* holded_rblock - whether the caller has holded zpool->rb_lock
				471	*/
				472	static void zcache_rbnode_isolate(struct zcache_pool *zpool,
				473	struct zcache_rbnode *rbnode, bool holded_rblock)
				474	{
				475	unsigned long flags;
				476
				477	if (!holded_rblock)
				478	write_lock_irqsave(&zpool->rb_lock, flags);
				479	/*
				480	* Someone can get reference on this rbnode before we could
				481	* acquire write lock above.
				482	* We want to remove it from zpool->rbtree when only the caller and
				483	* corresponding ratree holds a reference to this rbnode.
				484	* Below check ensures that a racing zcache put will not end up adding
				485	* a page to an isolated node and thereby losing that memory.
				486	*/
				487	if (atomic_read(&rbnode->refcount.refcount) == 2) {
				488	rb_erase(&rbnode->rb_node, &zpool->rbtree);
				489	RB_CLEAR_NODE(&rbnode->rb_node);
				490	kref_put(&rbnode->refcount, zcache_rbnode_release);
				491	}
				492	if (!holded_rblock)
				493	write_unlock_irqrestore(&zpool->rb_lock, flags);
				494	}
				495
				496	/*
				497	* Store zaddr which allocated by zbud_alloc() to the hierarchy rbtree-ratree.
				498	*/
				499	static int zcache_store_zaddr(struct zcache_pool *zpool,
				500	int ra_index, int rb_index, unsigned long zaddr)
				501	{
				502	unsigned long flags;
				503	struct zcache_rbnode rbnode, tmp;
				504	struct rb_node *link = NULL, parent = NULL;
				505	int ret;
				506	void *dup_zaddr;
				507
				508	rbnode = zcache_find_get_rbnode(zpool, rb_index);
				509	if (!rbnode) {
				510	/* alloc and init a new rbnode */
				511	rbnode = kmem_cache_alloc(zcache_rbnode_cache,
				512	GFP_ZCACHE);
				513	if (!rbnode)
				514	return -ENOMEM;
				515
				516	INIT_RADIX_TREE(&rbnode->ratree, GFP_ATOMIC\|__GFP_NOWARN);
				517	spin_lock_init(&rbnode->ra_lock);
				518	rbnode->rb_index = rb_index;
				519	kref_init(&rbnode->refcount);
				520	RB_CLEAR_NODE(&rbnode->rb_node);
				521
				522	/* add that rbnode to rbtree */
				523	write_lock_irqsave(&zpool->rb_lock, flags);
				524	tmp = zcache_find_rbnode(&zpool->rbtree, rb_index,
				525	&parent, &link);
				526	if (tmp) {
				527	/* somebody else allocated new rbnode */
				528	kmem_cache_free(zcache_rbnode_cache, rbnode);
				529	rbnode = tmp;
				530	} else {
				531	rb_link_node(&rbnode->rb_node, parent, link);
				532	rb_insert_color(&rbnode->rb_node, &zpool->rbtree);
				533	}
				534
				535	/* Inc the reference of this zcache_rbnode */
				536	kref_get(&rbnode->refcount);
				537	write_unlock_irqrestore(&zpool->rb_lock, flags);
				538	}
				539
				540	/* Succfully got a zcache_rbnode when arriving here */
				541	spin_lock_irqsave(&rbnode->ra_lock, flags);
				542	dup_zaddr = radix_tree_delete(&rbnode->ratree, ra_index);
				543	if (unlikely(dup_zaddr)) {
Steve Kondik	f94910d	2016-04-14 02:52:21 -0700	[diff] [blame]	544	if (dup_zaddr == ZERO_HANDLE) {
				545	atomic_dec(&zcache_stored_zero_pages);
				546	} else {
				547	zbud_free(zpool->pool, (unsigned long)dup_zaddr);
				548	atomic_dec(&zcache_stored_pages);
				549	zpool->size = zbud_get_pool_size(zpool->pool);
				550	}
				551	zcache_dup_entry++;
				552	}
				553
				554	/* Insert zcache_ra_handle to ratree */
				555	ret = radix_tree_insert(&rbnode->ratree, ra_index,
				556	(void *)zaddr);
				557	spin_unlock_irqrestore(&rbnode->ra_lock, flags);
				558	if (unlikely(ret)) {
				559	write_lock_irqsave(&zpool->rb_lock, flags);
				560	spin_lock(&rbnode->ra_lock);
				561
				562	if (zcache_rbnode_empty(rbnode))
				563	zcache_rbnode_isolate(zpool, rbnode, 1);
				564
				565	spin_unlock(&rbnode->ra_lock);
				566	write_unlock_irqrestore(&zpool->rb_lock, flags);
				567	}
				568
				569	kref_put(&rbnode->refcount, zcache_rbnode_release);
				570	return ret;
				571	}
				572
				573	/*
				574	* Load zaddr and delete it from radix tree.
				575	* If the radix tree of the corresponding rbnode is empty, delete the rbnode
				576	* from zpool->rbtree also.
				577	*/
				578	static void zcache_load_delete_zaddr(struct zcache_pool zpool,
				579	int rb_index, int ra_index)
				580	{
				581	struct zcache_rbnode *rbnode;
				582	void *zaddr = NULL;
				583	unsigned long flags;
				584
				585	rbnode = zcache_find_get_rbnode(zpool, rb_index);
				586	if (!rbnode)
				587	goto out;
				588
				589	BUG_ON(rbnode->rb_index != rb_index);
				590
				591	spin_lock_irqsave(&rbnode->ra_lock, flags);
				592	zaddr = radix_tree_delete(&rbnode->ratree, ra_index);
				593	spin_unlock_irqrestore(&rbnode->ra_lock, flags);
				594
				595	/* rb_lock and ra_lock must be taken again in the given sequence */
				596	write_lock_irqsave(&zpool->rb_lock, flags);
				597	spin_lock(&rbnode->ra_lock);
				598	if (zcache_rbnode_empty(rbnode))
				599	zcache_rbnode_isolate(zpool, rbnode, 1);
				600	spin_unlock(&rbnode->ra_lock);
				601	write_unlock_irqrestore(&zpool->rb_lock, flags);
				602
				603	kref_put(&rbnode->refcount, zcache_rbnode_release);
				604	out:
				605	return zaddr;
				606	}
				607
				608	static bool zero_page(struct page *page)
				609	{
				610	unsigned long *ptr = kmap_atomic(page);
				611	int i;
				612	bool ret = false;
				613
				614	for (i = 0; i < PAGE_SIZE / sizeof(*ptr); i++) {
				615	if (ptr[i])
				616	goto out;
				617	}
				618	ret = true;
				619	out:
				620	kunmap_atomic(ptr);
				621	return ret;
				622	}
				623
				624	static void zcache_store_page(int pool_id, struct cleancache_filekey key,
				625	pgoff_t index, struct page *page)
				626	{
				627	struct zcache_ra_handle *zhandle;
				628	u8 zpage, src, *dst;
				629	/* Address of zhandle + compressed data(zpage) */
				630	unsigned long zaddr = 0;
				631	unsigned int zlen = PAGE_SIZE;
				632	bool zero = 0;
				633	int ret;
				634
				635	struct zcache_pool *zpool = zcache.pools[pool_id];
				636
				637	/*
				638	* Zcache will be ineffective if the compressed memory pool is full with
				639	* compressed inactive file pages and most of them will never be used
				640	* again.
				641	* So we refuse to compress pages that are not from active file list.
				642	*/
				643	if (!PageWasActive(page)) {
				644	zcache_inactive_pages_refused++;
				645	return;
				646	}
				647
				648	zero = zero_page(page);
				649	if (zero)
				650	goto zero;
				651
				652	if (zcache_is_full()) {
				653	zcache_pool_limit_hit++;
				654	if (zbud_reclaim_page(zpool->pool, 8)) {
				655	zcache_reclaim_fail++;
				656	return;
				657	}
				658	/*
				659	* Continue if reclaimed a page frame succ.
				660	*/
				661	zcache_evict_filepages++;
				662	zpool->size = zbud_get_pool_size(zpool->pool);
				663	}
				664
				665	/* compress */
				666	dst = get_cpu_var(zcache_dstmem);
				667	src = kmap_atomic(page);
				668	ret = zcache_comp_op(ZCACHE_COMPOP_COMPRESS, src, PAGE_SIZE, dst,
				669	&zlen);
				670	kunmap_atomic(src);
				671	if (ret) {
				672	pr_err("zcache compress error ret %d\n", ret);
				673	put_cpu_var(zcache_dstmem);
				674	return;
				675	}
				676
				677	/* store zcache handle together with compressed page data */
				678	ret = zbud_alloc(zpool->pool, zlen + sizeof(struct zcache_ra_handle),
				679	GFP_ZCACHE, &zaddr);
				680	if (ret) {
				681	zcache_zbud_alloc_fail++;
				682	put_cpu_var(zcache_dstmem);
				683	return;
				684	}
				685
				686	zhandle = (struct zcache_ra_handle *)zbud_map(zpool->pool, zaddr);
				687
				688	/* Compressed page data stored at the end of zcache_ra_handle */
				689	zpage = (u8 *)(zhandle + 1);
				690	memcpy(zpage, dst, zlen);
				691	zbud_unmap(zpool->pool, zaddr);
				692	put_cpu_var(zcache_dstmem);
				693
				694	zero:
				695	if (zero)
				696	zaddr = (unsigned long)ZERO_HANDLE;
				697
				698	/* store zcache handle */
				699	ret = zcache_store_zaddr(zpool, index, key.u.ino, zaddr);
				700	if (ret) {
				701	zcache_store_failed++;
				702	if (!zero)
				703	zbud_free(zpool->pool, zaddr);
				704	return;
				705	}
				706
				707	/* update stats */
				708	if (zero) {
				709	atomic_inc(&zcache_stored_zero_pages);
				710	} else {
				711	zhandle->ra_index = index;
				712	zhandle->rb_index = key.u.ino;
				713	zhandle->zlen = zlen;
				714	zhandle->zpool = zpool;
				715	atomic_inc(&zcache_stored_pages);
				716	zpool->size = zbud_get_pool_size(zpool->pool);
				717	}
				718
				719	return;
				720	}
				721
				722	static int zcache_load_page(int pool_id, struct cleancache_filekey key,
				723	pgoff_t index, struct page *page)
				724	{
				725	int ret = 0;
				726	u8 src, dst;
				727	void *zaddr;
				728	unsigned int dlen = PAGE_SIZE;
				729	struct zcache_ra_handle *zhandle;
				730	struct zcache_pool *zpool = zcache.pools[pool_id];
				731
				732	zaddr = zcache_load_delete_zaddr(zpool, key.u.ino, index);
				733	if (!zaddr)
				734	return -ENOENT;
				735	else if (zaddr == ZERO_HANDLE)
				736	goto map;
				737
				738	zhandle = (struct zcache_ra_handle *)zbud_map(zpool->pool,
				739	(unsigned long)zaddr);
				740	/* Compressed page data stored at the end of zcache_ra_handle */
				741	src = (u8 *)(zhandle + 1);
				742
				743	/* decompress */
				744	map:
				745	dst = kmap_atomic(page);
				746	if (zaddr != ZERO_HANDLE) {
				747	ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, src,
				748	zhandle->zlen, dst, &dlen);
				749	} else {
				750	memset(dst, 0, PAGE_SIZE);
				751	kunmap_atomic(dst);
				752	flush_dcache_page(page);
				753	atomic_dec(&zcache_stored_zero_pages);
				754	goto out;
				755	}
				756	kunmap_atomic(dst);
				757	zbud_unmap(zpool->pool, (unsigned long)zaddr);
				758	zbud_free(zpool->pool, (unsigned long)zaddr);
				759
				760	BUG_ON(ret);
				761	BUG_ON(dlen != PAGE_SIZE);
				762
				763	/* update stats */
				764	atomic_dec(&zcache_stored_pages);
				765	zpool->size = zbud_get_pool_size(zpool->pool);
				766	out:
				767	SetPageWasActive(page);
				768	return ret;
				769	}
				770
				771	static void zcache_flush_page(int pool_id, struct cleancache_filekey key,
				772	pgoff_t index)
				773	{
				774	struct zcache_pool *zpool = zcache.pools[pool_id];
				775	void *zaddr = NULL;
				776
				777	zaddr = zcache_load_delete_zaddr(zpool, key.u.ino, index);
				778	if (zaddr && (zaddr != ZERO_HANDLE)) {
				779	zbud_free(zpool->pool, (unsigned long)zaddr);
				780	atomic_dec(&zcache_stored_pages);
				781	zpool->size = zbud_get_pool_size(zpool->pool);
				782	} else if (zaddr == ZERO_HANDLE) {
				783	atomic_dec(&zcache_stored_zero_pages);
				784	}
				785	}
				786
				787	#define FREE_BATCH 16
				788	/*
				789	* Callers must hold the lock
				790	*/
				791	static void zcache_flush_ratree(struct zcache_pool *zpool,
				792	struct zcache_rbnode *rbnode)
				793	{
				794	unsigned long index = 0;
				795	int count, i;
				796	struct zcache_ra_handle *zhandle;
				797	void *zaddr = NULL;
				798
				799	do {
				800	void *zaddrs[FREE_BATCH];
				801	unsigned long indices[FREE_BATCH];
				802
				803	count = radix_tree_gang_lookup_index(&rbnode->ratree,
				804	(void **)zaddrs, indices,
				805	index, FREE_BATCH);
				806
				807	for (i = 0; i < count; i++) {
				808	if (zaddrs[i] == ZERO_HANDLE) {
				809	zaddr = radix_tree_delete(&rbnode->ratree,
				810	indices[i]);
				811	if (zaddr)
				812	atomic_dec(&zcache_stored_zero_pages);
				813	continue;
				814	}
				815	zhandle = (struct zcache_ra_handle *)zbud_map(
				816	zpool->pool, (unsigned long)zaddrs[i]);
				817	index = zhandle->ra_index;
				818	zaddr = radix_tree_delete(&rbnode->ratree, index);
				819	if (!zaddr)
				820	continue;
				821	zbud_unmap(zpool->pool, (unsigned long)zaddrs[i]);
				822	zbud_free(zpool->pool, (unsigned long)zaddrs[i]);
				823	atomic_dec(&zcache_stored_pages);
				824	zpool->size = zbud_get_pool_size(zpool->pool);
				825	}
				826
				827	index++;
				828	} while (count == FREE_BATCH);
				829	}
				830
				831	static void zcache_flush_inode(int pool_id, struct cleancache_filekey key)
				832	{
				833	struct zcache_rbnode *rbnode;
				834	unsigned long flags1, flags2;
				835	struct zcache_pool *zpool = zcache.pools[pool_id];
				836
				837	/*
				838	* Refuse new pages added in to the same rbinode, so get rb_lock at
				839	* first.
				840	*/
				841	write_lock_irqsave(&zpool->rb_lock, flags1);
				842	rbnode = zcache_find_rbnode(&zpool->rbtree, key.u.ino, 0, 0);
				843	if (!rbnode) {
				844	write_unlock_irqrestore(&zpool->rb_lock, flags1);
				845	return;
				846	}
				847
				848	kref_get(&rbnode->refcount);
				849	spin_lock_irqsave(&rbnode->ra_lock, flags2);
				850
				851	zcache_flush_ratree(zpool, rbnode);
				852	if (zcache_rbnode_empty(rbnode))
				853	/* When arrvied here, we already hold rb_lock */
				854	zcache_rbnode_isolate(zpool, rbnode, 1);
				855
				856	spin_unlock_irqrestore(&rbnode->ra_lock, flags2);
				857	write_unlock_irqrestore(&zpool->rb_lock, flags1);
				858	kref_put(&rbnode->refcount, zcache_rbnode_release);
				859	}
				860
				861	static void zcache_destroy_pool(struct zcache_pool *zpool);
				862	static void zcache_flush_fs(int pool_id)
				863	{
				864	struct zcache_rbnode *z_rbnode = NULL;
				865	struct rb_node *rbnode;
				866	unsigned long flags1, flags2;
				867	struct zcache_pool *zpool;
				868
				869	if (pool_id < 0)
				870	return;
				871
				872	zpool = zcache.pools[pool_id];
				873	if (!zpool)
				874	return;
				875
				876	/*
				877	* Refuse new pages added in, so get rb_lock at first.
				878	*/
				879	write_lock_irqsave(&zpool->rb_lock, flags1);
				880
				881	rbnode = rb_first(&zpool->rbtree);
				882	while (rbnode) {
				883	z_rbnode = rb_entry(rbnode, struct zcache_rbnode, rb_node);
				884	rbnode = rb_next(rbnode);
				885	if (z_rbnode) {
				886	kref_get(&z_rbnode->refcount);
				887	spin_lock_irqsave(&z_rbnode->ra_lock, flags2);
				888	zcache_flush_ratree(zpool, z_rbnode);
				889	if (zcache_rbnode_empty(z_rbnode))
				890	zcache_rbnode_isolate(zpool, z_rbnode, 1);
				891	spin_unlock_irqrestore(&z_rbnode->ra_lock, flags2);
				892	kref_put(&z_rbnode->refcount, zcache_rbnode_release);
				893	}
				894	}
				895
				896	write_unlock_irqrestore(&zpool->rb_lock, flags1);
				897	zcache_destroy_pool(zpool);
				898	}
				899
				900	/*
				901	* Evict compressed pages from zcache pool on an LRU basis after the compressed
				902	* pool is full.
				903	*/
				904	static int zcache_evict_zpage(struct zbud_pool *pool, unsigned long zaddr)
				905	{
				906	struct zcache_pool *zpool;
				907	struct zcache_ra_handle *zhandle;
				908	void *zaddr_intree;
				909
				910	BUG_ON(zaddr == (unsigned long)ZERO_HANDLE);
				911
				912	zhandle = (struct zcache_ra_handle *)zbud_map(pool, zaddr);
				913
				914	zpool = zhandle->zpool;
				915	/* There can be a race with zcache store */
				916	if (!zpool)
				917	return -EINVAL;
				918
				919	BUG_ON(pool != zpool->pool);
				920
				921	zaddr_intree = zcache_load_delete_zaddr(zpool, zhandle->rb_index,
				922	zhandle->ra_index);
				923	if (zaddr_intree) {
				924	BUG_ON((unsigned long)zaddr_intree != zaddr);
				925	zbud_unmap(pool, zaddr);
				926	zbud_free(pool, zaddr);
				927	atomic_dec(&zcache_stored_pages);
				928	zpool->size = zbud_get_pool_size(pool);
				929	zcache_evict_zpages++;
				930	}
				931	return 0;
				932	}
				933
				934	static struct zbud_ops zcache_zbud_ops = {
				935	.evict = zcache_evict_zpage
				936	};
				937
				938	/* Return pool id */
				939	static int zcache_create_pool(void)
				940	{
				941	int ret;
				942	struct zcache_pool *zpool;
				943
				944	zpool = kzalloc(sizeof(*zpool), GFP_KERNEL);
				945	if (!zpool) {
				946	ret = -ENOMEM;
				947	goto out;
				948	}
				949
				950	zpool->pool = zbud_create_pool(GFP_KERNEL, &zcache_zbud_ops);
				951	if (!zpool->pool) {
				952	kfree(zpool);
				953	ret = -ENOMEM;
				954	goto out;
				955	}
				956
				957	spin_lock(&zcache.pool_lock);
				958	if (zcache.num_pools == MAX_ZCACHE_POOLS) {
				959	pr_err("Cannot create new pool (limit:%u)\n", MAX_ZCACHE_POOLS);
				960	zbud_destroy_pool(zpool->pool);
				961	kfree(zpool);
				962	ret = -EPERM;
				963	goto out_unlock;
				964	}
				965
				966	rwlock_init(&zpool->rb_lock);
				967	zpool->rbtree = RB_ROOT;
				968	/* Add to pool list */
				969	for (ret = 0; ret < MAX_ZCACHE_POOLS; ret++)
				970	if (!zcache.pools[ret])
				971	break;
				972	zcache.pools[ret] = zpool;
				973	zcache.num_pools++;
				974	pr_info("New pool created id:%d\n", ret);
				975
				976	out_unlock:
				977	spin_unlock(&zcache.pool_lock);
				978	out:
				979	return ret;
				980	}
				981
				982	static void zcache_destroy_pool(struct zcache_pool *zpool)
				983	{
				984	int i;
				985
				986	if (!zpool)
				987	return;
				988
				989	spin_lock(&zcache.pool_lock);
				990	zcache.num_pools--;
				991	for (i = 0; i < MAX_ZCACHE_POOLS; i++)
				992	if (zcache.pools[i] == zpool)
				993	break;
				994	zcache.pools[i] = NULL;
				995	spin_unlock(&zcache.pool_lock);
				996
				997	if (!RB_EMPTY_ROOT(&zpool->rbtree))
				998	WARN_ON("Memory leak detected. Freeing non-empty pool!\n");
				999
				1000	zbud_destroy_pool(zpool->pool);
				1001	kfree(zpool);
				1002	}
				1003
				1004	static int zcache_init_fs(size_t pagesize)
				1005	{
				1006	int ret;
				1007
				1008	if (pagesize != PAGE_SIZE) {
				1009	pr_info("Unsupported page size: %zu", pagesize);
				1010	ret = -EINVAL;
				1011	goto out;
				1012	}
				1013
				1014	ret = zcache_create_pool();
				1015	if (ret < 0) {
				1016	pr_info("Failed to create new pool\n");
				1017	ret = -ENOMEM;
				1018	goto out;
				1019	}
				1020	out:
				1021	return ret;
				1022	}
				1023
				1024	static int zcache_init_shared_fs(char *uuid, size_t pagesize)
				1025	{
				1026	/* shared pools are unsupported and map to private */
				1027	return zcache_init_fs(pagesize);
				1028	}
				1029
				1030	static struct cleancache_ops zcache_ops = {
				1031	.put_page = zcache_store_page,
				1032	.get_page = zcache_load_page,
				1033	.invalidate_page = zcache_flush_page,
				1034	.invalidate_inode = zcache_flush_inode,
				1035	.invalidate_fs = zcache_flush_fs,
				1036	.init_shared_fs = zcache_init_shared_fs,
				1037	.init_fs = zcache_init_fs
				1038	};
				1039
				1040	/*
				1041	* Debugfs functions
				1042	*/
				1043	#ifdef CONFIG_DEBUG_FS
				1044	#include <linux/debugfs.h>
				1045
				1046	static int pool_pages_get(void _data, u64 val)
				1047	{
				1048	*val = zcache_pages();
				1049	return 0;
				1050	}
				1051
				1052	DEFINE_SIMPLE_ATTRIBUTE(pool_page_fops, pool_pages_get, NULL, "%llu\n");
				1053
				1054	static struct dentry *zcache_debugfs_root;
				1055
				1056	static int __init zcache_debugfs_init(void)
				1057	{
				1058	if (!debugfs_initialized())
				1059	return -ENODEV;
				1060
				1061	zcache_debugfs_root = debugfs_create_dir("zcache", NULL);
				1062	if (!zcache_debugfs_root)
				1063	return -ENOMEM;
				1064
				1065	debugfs_create_u64("pool_limit_hit", S_IRUGO, zcache_debugfs_root,
				1066	&zcache_pool_limit_hit);
				1067	debugfs_create_u64("reject_alloc_fail", S_IRUGO, zcache_debugfs_root,
				1068	&zcache_zbud_alloc_fail);
				1069	debugfs_create_u64("duplicate_entry", S_IRUGO, zcache_debugfs_root,
				1070	&zcache_dup_entry);
				1071	debugfs_create_file("pool_pages", S_IRUGO, zcache_debugfs_root, NULL,
				1072	&pool_page_fops);
				1073	debugfs_create_atomic_t("stored_pages", S_IRUGO, zcache_debugfs_root,
				1074	&zcache_stored_pages);
				1075	debugfs_create_atomic_t("stored_zero_pages", S_IRUGO,
				1076	zcache_debugfs_root, &zcache_stored_zero_pages);
				1077	debugfs_create_u64("evicted_zpages", S_IRUGO, zcache_debugfs_root,
				1078	&zcache_evict_zpages);
				1079	debugfs_create_u64("evicted_filepages", S_IRUGO, zcache_debugfs_root,
				1080	&zcache_evict_filepages);
				1081	debugfs_create_u64("reclaim_fail", S_IRUGO, zcache_debugfs_root,
				1082	&zcache_reclaim_fail);
				1083	debugfs_create_u64("inactive_pages_refused", S_IRUGO,
				1084	zcache_debugfs_root, &zcache_inactive_pages_refused);
				1085	debugfs_create_u64("pool_shrink_count", S_IRUGO,
				1086	zcache_debugfs_root, &zcache_pool_shrink);
				1087	debugfs_create_u64("pool_shrink_fail", S_IRUGO,
				1088	zcache_debugfs_root, &zcache_pool_shrink_fail);
				1089	debugfs_create_u64("pool_shrink_pages", S_IRUGO,
				1090	zcache_debugfs_root, &zcache_pool_shrink_pages);
				1091	debugfs_create_u64("store_fail", S_IRUGO,
				1092	zcache_debugfs_root, &zcache_store_failed);
				1093	return 0;
				1094	}
				1095
				1096	static void __exit zcache_debugfs_exit(void)
				1097	{
				1098	debugfs_remove_recursive(zcache_debugfs_root);
				1099	}
				1100	#else
				1101	static int __init zcache_debugfs_init(void)
				1102	{
				1103	return 0;
				1104	}
				1105	static void __exit zcache_debugfs_exit(void)
				1106	{
				1107	}
				1108	#endif
				1109
				1110	/*
				1111	* zcache init and exit
				1112	*/
				1113	static int __init init_zcache(void)
				1114	{
				1115	if (!zcache_enabled)
				1116	return 0;
				1117
				1118	pr_info("loading zcache..\n");
				1119	if (zcache_rbnode_cache_create()) {
				1120	pr_err("entry cache creation failed\n");
				1121	goto error;
				1122	}
				1123
				1124	if (zcache_comp_init()) {
				1125	pr_err("compressor initialization failed\n");
				1126	goto compfail;
				1127	}
				1128	if (zcache_cpu_init()) {
				1129	pr_err("per-cpu initialization failed\n");
				1130	goto pcpufail;
				1131	}
				1132
				1133	spin_lock_init(&zcache.pool_lock);
				1134	cleancache_register_ops(&zcache_ops);
				1135
				1136	if (zcache_debugfs_init())
				1137	pr_warn("debugfs initialization failed\n");
				1138	register_shrinker(&zcache_shrinker);
				1139	return 0;
				1140	pcpufail:
				1141	zcache_comp_exit();
				1142	compfail:
				1143	zcache_rbnode_cache_destroy();
				1144	error:
				1145	return -ENOMEM;
				1146	}
				1147
				1148	/* must be late so crypto has time to come up */
				1149	late_initcall(init_zcache);
				1150
				1151	MODULE_LICENSE("GPL");
				1152	MODULE_AUTHOR("Bob Liu <bob.liu@xxxxxxxxxx>");
				1153	MODULE_DESCRIPTION("Compressed cache for clean file pages");
				1154