Blame - mm/zswap.c - kernel/msm-4.9

blob: 5a63f78a5601aa63112f30b3338be6c413f02f4c [file] [log] [blame]

Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	1	/*
				2	* zswap.c - zswap driver file
				3	*
				4	* zswap is a backend for frontswap that takes pages that are in the process
				5	* of being swapped out and attempts to compress and store them in a
				6	* RAM-based memory pool. This can result in a significant I/O reduction on
				7	* the swap device and, in the case where decompressing from RAM is faster
				8	* than reading from the swap device, can also improve workload performance.
				9	*
				10	* Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com>
				11	*
				12	* This program is free software; you can redistribute it and/or
				13	* modify it under the terms of the GNU General Public License
				14	* as published by the Free Software Foundation; either version 2
				15	* of the License, or (at your option) any later version.
				16	*
				17	* This program is distributed in the hope that it will be useful,
				18	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				20	* GNU General Public License for more details.
				21	*/
				22
				23	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
				24
				25	#include <linux/module.h>
				26	#include <linux/cpu.h>
				27	#include <linux/highmem.h>
				28	#include <linux/slab.h>
				29	#include <linux/spinlock.h>
				30	#include <linux/types.h>
				31	#include <linux/atomic.h>
				32	#include <linux/frontswap.h>
				33	#include <linux/rbtree.h>
				34	#include <linux/swap.h>
				35	#include <linux/crypto.h>
				36	#include <linux/mempool.h>
				37	#include <linux/zbud.h>
				38
				39	#include <linux/mm_types.h>
				40	#include <linux/page-flags.h>
				41	#include <linux/swapops.h>
				42	#include <linux/writeback.h>
				43	#include <linux/pagemap.h>
				44
				45	/*********************************
				46	* statistics
				47	**********************************/
				48	/* Number of memory pages used by the compressed pool */
				49	static u64 zswap_pool_pages;
				50	/* The number of compressed pages currently stored in zswap */
				51	static atomic_t zswap_stored_pages = ATOMIC_INIT(0);
				52
				53	/*
				54	* The statistics below are not protected from concurrent access for
				55	* performance reasons so they may not be a 100% accurate. However,
				56	* they do provide useful information on roughly how many times a
				57	* certain event is occurring.
				58	*/
				59
				60	/* Pool limit was hit (see zswap_max_pool_percent) */
				61	static u64 zswap_pool_limit_hit;
				62	/* Pages written back when pool limit was reached */
				63	static u64 zswap_written_back_pages;
				64	/* Store failed due to a reclaim failure after pool limit was reached */
				65	static u64 zswap_reject_reclaim_fail;
				66	/* Compressed page was too big for the allocator to (optimally) store */
				67	static u64 zswap_reject_compress_poor;
				68	/* Store failed because underlying allocator could not get memory */
				69	static u64 zswap_reject_alloc_fail;
				70	/* Store failed because the entry metadata could not be allocated (rare) */
				71	static u64 zswap_reject_kmemcache_fail;
				72	/* Duplicate store was encountered (rare) */
				73	static u64 zswap_duplicate_entry;
				74
				75	/*********************************
				76	* tunables
				77	**********************************/
				78	/* Enable/disable zswap (disabled by default, fixed at boot for now) */
				79	static bool zswap_enabled __read_mostly;
				80	module_param_named(enabled, zswap_enabled, bool, 0);
				81
				82	/* Compressor to be used by zswap (fixed at boot for now) */
				83	#define ZSWAP_COMPRESSOR_DEFAULT "lzo"
				84	static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
				85	module_param_named(compressor, zswap_compressor, charp, 0);
				86
				87	/* The maximum percentage of memory that the compressed pool can occupy */
				88	static unsigned int zswap_max_pool_percent = 20;
				89	module_param_named(max_pool_percent,
				90	zswap_max_pool_percent, uint, 0644);
				91
				92	/*********************************
				93	* compression functions
				94	**********************************/
				95	/* per-cpu compression transforms */
				96	static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms;
				97
				98	enum comp_op {
				99	ZSWAP_COMPOP_COMPRESS,
				100	ZSWAP_COMPOP_DECOMPRESS
				101	};
				102
				103	static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
				104	u8 dst, unsigned int dlen)
				105	{
				106	struct crypto_comp *tfm;
				107	int ret;
				108
				109	tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu());
				110	switch (op) {
				111	case ZSWAP_COMPOP_COMPRESS:
				112	ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
				113	break;
				114	case ZSWAP_COMPOP_DECOMPRESS:
				115	ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
				116	break;
				117	default:
				118	ret = -EINVAL;
				119	}
				120
				121	put_cpu();
				122	return ret;
				123	}
				124
				125	static int __init zswap_comp_init(void)
				126	{
				127	if (!crypto_has_comp(zswap_compressor, 0, 0)) {
				128	pr_info("%s compressor not available\n", zswap_compressor);
				129	/* fall back to default compressor */
				130	zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT;
				131	if (!crypto_has_comp(zswap_compressor, 0, 0))
				132	/* can't even load the default compressor */
				133	return -ENODEV;
				134	}
				135	pr_info("using %s compressor\n", zswap_compressor);
				136
				137	/* alloc percpu transforms */
				138	zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
				139	if (!zswap_comp_pcpu_tfms)
				140	return -ENOMEM;
				141	return 0;
				142	}
				143
				144	static void zswap_comp_exit(void)
				145	{
				146	/* free percpu transforms */
				147	if (zswap_comp_pcpu_tfms)
				148	free_percpu(zswap_comp_pcpu_tfms);
				149	}
				150
				151	/*********************************
				152	* data structures
				153	**********************************/
				154	/*
				155	* struct zswap_entry
				156	*
				157	* This structure contains the metadata for tracking a single compressed
				158	* page within zswap.
				159	*
				160	* rbnode - links the entry into red-black tree for the appropriate swap type
				161	* refcount - the number of outstanding reference to the entry. This is needed
				162	* to protect against premature freeing of the entry by code
				163	* concurent calls to load, invalidate, and writeback. The lock
				164	* for the zswap_tree structure that contains the entry must
				165	* be held while changing the refcount. Since the lock must
				166	* be held, there is no reason to also make refcount atomic.
				167	* offset - the swap offset for the entry. Index into the red-black tree.
				168	* handle - zsmalloc allocation handle that stores the compressed page data
				169	* length - the length in bytes of the compressed page data. Needed during
				170	* decompression
				171	*/
				172	struct zswap_entry {
				173	struct rb_node rbnode;
				174	pgoff_t offset;
				175	int refcount;
				176	unsigned int length;
				177	unsigned long handle;
				178	};
				179
				180	struct zswap_header {
				181	swp_entry_t swpentry;
				182	};
				183
				184	/*
				185	* The tree lock in the zswap_tree struct protects a few things:
				186	* - the rbtree
				187	* - the refcount field of each entry in the tree
				188	*/
				189	struct zswap_tree {
				190	struct rb_root rbroot;
				191	spinlock_t lock;
				192	struct zbud_pool *pool;
				193	};
				194
				195	static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
				196
				197	/*********************************
				198	* zswap entry functions
				199	**********************************/
				200	static struct kmem_cache *zswap_entry_cache;
				201
				202	static int zswap_entry_cache_create(void)
				203	{
				204	zswap_entry_cache = KMEM_CACHE(zswap_entry, 0);
				205	return (zswap_entry_cache == NULL);
				206	}
				207
				208	static void zswap_entry_cache_destory(void)
				209	{
				210	kmem_cache_destroy(zswap_entry_cache);
				211	}
				212
				213	static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp)
				214	{
				215	struct zswap_entry *entry;
				216	entry = kmem_cache_alloc(zswap_entry_cache, gfp);
				217	if (!entry)
				218	return NULL;
				219	entry->refcount = 1;
Weijie Yang	0ab0abc	2013-11-12 15:08:27 -0800	[diff] [blame]	220	RB_CLEAR_NODE(&entry->rbnode);
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	221	return entry;
				222	}
				223
				224	static void zswap_entry_cache_free(struct zswap_entry *entry)
				225	{
				226	kmem_cache_free(zswap_entry_cache, entry);
				227	}
				228
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	229	/*********************************
				230	* rbtree functions
				231	**********************************/
				232	static struct zswap_entry zswap_rb_search(struct rb_root root, pgoff_t offset)
				233	{
				234	struct rb_node *node = root->rb_node;
				235	struct zswap_entry *entry;
				236
				237	while (node) {
				238	entry = rb_entry(node, struct zswap_entry, rbnode);
				239	if (entry->offset > offset)
				240	node = node->rb_left;
				241	else if (entry->offset < offset)
				242	node = node->rb_right;
				243	else
				244	return entry;
				245	}
				246	return NULL;
				247	}
				248
				249	/*
				250	* In the case that a entry with the same offset is found, a pointer to
				251	* the existing entry is stored in dupentry and the function returns -EEXIST
				252	*/
				253	static int zswap_rb_insert(struct rb_root root, struct zswap_entry entry,
				254	struct zswap_entry **dupentry)
				255	{
				256	struct rb_node *link = &root->rb_node, parent = NULL;
				257	struct zswap_entry *myentry;
				258
				259	while (*link) {
				260	parent = *link;
				261	myentry = rb_entry(parent, struct zswap_entry, rbnode);
				262	if (myentry->offset > entry->offset)
				263	link = &(*link)->rb_left;
				264	else if (myentry->offset < entry->offset)
				265	link = &(*link)->rb_right;
				266	else {
				267	*dupentry = myentry;
				268	return -EEXIST;
				269	}
				270	}
				271	rb_link_node(&entry->rbnode, parent, link);
				272	rb_insert_color(&entry->rbnode, root);
				273	return 0;
				274	}
				275
Weijie Yang	0ab0abc	2013-11-12 15:08:27 -0800	[diff] [blame]	276	static void zswap_rb_erase(struct rb_root root, struct zswap_entry entry)
				277	{
				278	if (!RB_EMPTY_NODE(&entry->rbnode)) {
				279	rb_erase(&entry->rbnode, root);
				280	RB_CLEAR_NODE(&entry->rbnode);
				281	}
				282	}
				283
				284	/*
				285	* Carries out the common pattern of freeing and entry's zsmalloc allocation,
				286	* freeing the entry itself, and decrementing the number of stored pages.
				287	*/
				288	static void zswap_free_entry(struct zswap_tree *tree,
				289	struct zswap_entry *entry)
				290	{
				291	zbud_free(tree->pool, entry->handle);
				292	zswap_entry_cache_free(entry);
				293	atomic_dec(&zswap_stored_pages);
				294	zswap_pool_pages = zbud_get_pool_size(tree->pool);
				295	}
				296
				297	/* caller must hold the tree lock */
				298	static void zswap_entry_get(struct zswap_entry *entry)
				299	{
				300	entry->refcount++;
				301	}
				302
				303	/* caller must hold the tree lock
				304	* remove from the tree and free it, if nobody reference the entry
				305	*/
				306	static void zswap_entry_put(struct zswap_tree *tree,
				307	struct zswap_entry *entry)
				308	{
				309	int refcount = --entry->refcount;
				310
				311	BUG_ON(refcount < 0);
				312	if (refcount == 0) {
				313	zswap_rb_erase(&tree->rbroot, entry);
				314	zswap_free_entry(tree, entry);
				315	}
				316	}
				317
				318	/* caller must hold the tree lock */
				319	static struct zswap_entry zswap_entry_find_get(struct rb_root root,
				320	pgoff_t offset)
				321	{
				322	struct zswap_entry *entry = NULL;
				323
				324	entry = zswap_rb_search(root, offset);
				325	if (entry)
				326	zswap_entry_get(entry);
				327
				328	return entry;
				329	}
				330
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	331	/*********************************
				332	* per-cpu code
				333	**********************************/
				334	static DEFINE_PER_CPU(u8 *, zswap_dstmem);
				335
				336	static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu)
				337	{
				338	struct crypto_comp *tfm;
				339	u8 *dst;
				340
				341	switch (action) {
				342	case CPU_UP_PREPARE:
				343	tfm = crypto_alloc_comp(zswap_compressor, 0, 0);
				344	if (IS_ERR(tfm)) {
				345	pr_err("can't allocate compressor transform\n");
				346	return NOTIFY_BAD;
				347	}
				348	*per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm;
				349	dst = kmalloc(PAGE_SIZE * 2, GFP_KERNEL);
				350	if (!dst) {
				351	pr_err("can't allocate compressor buffer\n");
				352	crypto_free_comp(tfm);
				353	*per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
				354	return NOTIFY_BAD;
				355	}
				356	per_cpu(zswap_dstmem, cpu) = dst;
				357	break;
				358	case CPU_DEAD:
				359	case CPU_UP_CANCELED:
				360	tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu);
				361	if (tfm) {
				362	crypto_free_comp(tfm);
				363	*per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL;
				364	}
				365	dst = per_cpu(zswap_dstmem, cpu);
				366	kfree(dst);
				367	per_cpu(zswap_dstmem, cpu) = NULL;
				368	break;
				369	default:
				370	break;
				371	}
				372	return NOTIFY_OK;
				373	}
				374
				375	static int zswap_cpu_notifier(struct notifier_block *nb,
				376	unsigned long action, void *pcpu)
				377	{
				378	unsigned long cpu = (unsigned long)pcpu;
				379	return __zswap_cpu_notifier(action, cpu);
				380	}
				381
				382	static struct notifier_block zswap_cpu_notifier_block = {
				383	.notifier_call = zswap_cpu_notifier
				384	};
				385
				386	static int zswap_cpu_init(void)
				387	{
				388	unsigned long cpu;
				389
				390	get_online_cpus();
				391	for_each_online_cpu(cpu)
				392	if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK)
				393	goto cleanup;
				394	register_cpu_notifier(&zswap_cpu_notifier_block);
				395	put_online_cpus();
				396	return 0;
				397
				398	cleanup:
				399	for_each_online_cpu(cpu)
				400	__zswap_cpu_notifier(CPU_UP_CANCELED, cpu);
				401	put_online_cpus();
				402	return -ENOMEM;
				403	}
				404
				405	/*********************************
				406	* helpers
				407	**********************************/
				408	static bool zswap_is_full(void)
				409	{
				410	return (totalram_pages * zswap_max_pool_percent / 100 <
				411	zswap_pool_pages);
				412	}
				413
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	414	/*********************************
				415	* writeback code
				416	**********************************/
				417	/* return enum for zswap_get_swap_cache_page */
				418	enum zswap_get_swap_ret {
				419	ZSWAP_SWAPCACHE_NEW,
				420	ZSWAP_SWAPCACHE_EXIST,
Weijie Yang	67d13fe	2013-11-12 15:08:26 -0800	[diff] [blame]	421	ZSWAP_SWAPCACHE_FAIL,
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	422	};
				423
				424	/*
				425	* zswap_get_swap_cache_page
				426	*
				427	* This is an adaption of read_swap_cache_async()
				428	*
				429	* This function tries to find a page with the given swap entry
				430	* in the swapper_space address space (the swap cache). If the page
				431	* is found, it is returned in retpage. Otherwise, a page is allocated,
				432	* added to the swap cache, and returned in retpage.
				433	*
				434	* If success, the swap cache page is returned in retpage
Weijie Yang	67d13fe	2013-11-12 15:08:26 -0800	[diff] [blame]	435	* Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache
				436	* Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated,
				437	* the new page is added to swapcache and locked
				438	* Returns ZSWAP_SWAPCACHE_FAIL on error
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	439	*/
				440	static int zswap_get_swap_cache_page(swp_entry_t entry,
				441	struct page **retpage)
				442	{
				443	struct page found_page, new_page = NULL;
Sunghan Suh	822518d	2013-09-11 14:20:22 -0700	[diff] [blame]	444	struct address_space *swapper_space = swap_address_space(entry);
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	445	int err;
				446
				447	*retpage = NULL;
				448	do {
				449	/*
				450	* First check the swap cache. Since this is normally
				451	* called after lookup_swap_cache() failed, re-calling
				452	* that would confuse statistics.
				453	*/
				454	found_page = find_get_page(swapper_space, entry.val);
				455	if (found_page)
				456	break;
				457
				458	/*
				459	* Get a new page to read into from swap.
				460	*/
				461	if (!new_page) {
				462	new_page = alloc_page(GFP_KERNEL);
				463	if (!new_page)
				464	break; /* Out of memory */
				465	}
				466
				467	/*
				468	* call radix_tree_preload() while we can wait.
				469	*/
				470	err = radix_tree_preload(GFP_KERNEL);
				471	if (err)
				472	break;
				473
				474	/*
				475	* Swap entry may have been freed since our caller observed it.
				476	*/
				477	err = swapcache_prepare(entry);
				478	if (err == -EEXIST) { /* seems racy */
				479	radix_tree_preload_end();
				480	continue;
				481	}
				482	if (err) { /* swp entry is obsolete ? */
				483	radix_tree_preload_end();
				484	break;
				485	}
				486
				487	/* May fail (-ENOMEM) if radix-tree node allocation failed. */
				488	__set_page_locked(new_page);
				489	SetPageSwapBacked(new_page);
				490	err = __add_to_swap_cache(new_page, entry);
				491	if (likely(!err)) {
				492	radix_tree_preload_end();
				493	lru_cache_add_anon(new_page);
				494	*retpage = new_page;
				495	return ZSWAP_SWAPCACHE_NEW;
				496	}
				497	radix_tree_preload_end();
				498	ClearPageSwapBacked(new_page);
				499	__clear_page_locked(new_page);
				500	/*
				501	* add_to_swap_cache() doesn't return -EEXIST, so we can safely
				502	* clear SWAP_HAS_CACHE flag.
				503	*/
				504	swapcache_free(entry, NULL);
				505	} while (err != -ENOMEM);
				506
				507	if (new_page)
				508	page_cache_release(new_page);
				509	if (!found_page)
Weijie Yang	67d13fe	2013-11-12 15:08:26 -0800	[diff] [blame]	510	return ZSWAP_SWAPCACHE_FAIL;
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	511	*retpage = found_page;
				512	return ZSWAP_SWAPCACHE_EXIST;
				513	}
				514
				515	/*
				516	* Attempts to free an entry by adding a page to the swap cache,
				517	* decompressing the entry data into the page, and issuing a
				518	* bio write to write the page back to the swap device.
				519	*
				520	* This can be thought of as a "resumed writeback" of the page
				521	* to the swap device. We are basically resuming the same swap
				522	* writeback path that was intercepted with the frontswap_store()
				523	* in the first place. After the page has been decompressed into
				524	* the swap cache, the compressed version stored by zswap can be
				525	* freed.
				526	*/
				527	static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
				528	{
				529	struct zswap_header *zhdr;
				530	swp_entry_t swpentry;
				531	struct zswap_tree *tree;
				532	pgoff_t offset;
				533	struct zswap_entry *entry;
				534	struct page *page;
				535	u8 src, dst;
				536	unsigned int dlen;
Weijie Yang	0ab0abc	2013-11-12 15:08:27 -0800	[diff] [blame]	537	int ret;
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	538	struct writeback_control wbc = {
				539	.sync_mode = WB_SYNC_NONE,
				540	};
				541
				542	/* extract swpentry from data */
				543	zhdr = zbud_map(pool, handle);
				544	swpentry = zhdr->swpentry; /* here */
				545	zbud_unmap(pool, handle);
				546	tree = zswap_trees[swp_type(swpentry)];
				547	offset = swp_offset(swpentry);
				548	BUG_ON(pool != tree->pool);
				549
				550	/* find and ref zswap entry */
				551	spin_lock(&tree->lock);
Weijie Yang	0ab0abc	2013-11-12 15:08:27 -0800	[diff] [blame]	552	entry = zswap_entry_find_get(&tree->rbroot, offset);
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	553	if (!entry) {
				554	/* entry was invalidated */
				555	spin_unlock(&tree->lock);
				556	return 0;
				557	}
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	558	spin_unlock(&tree->lock);
				559	BUG_ON(offset != entry->offset);
				560
				561	/* try to allocate swap cache page */
				562	switch (zswap_get_swap_cache_page(swpentry, &page)) {
Weijie Yang	67d13fe	2013-11-12 15:08:26 -0800	[diff] [blame]	563	case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	564	ret = -ENOMEM;
				565	goto fail;
				566
Weijie Yang	67d13fe	2013-11-12 15:08:26 -0800	[diff] [blame]	567	case ZSWAP_SWAPCACHE_EXIST:
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	568	/* page is already in the swap cache, ignore for now */
				569	page_cache_release(page);
				570	ret = -EEXIST;
				571	goto fail;
				572
				573	case ZSWAP_SWAPCACHE_NEW: /* page is locked */
				574	/* decompress */
				575	dlen = PAGE_SIZE;
				576	src = (u8 *)zbud_map(tree->pool, entry->handle) +
				577	sizeof(struct zswap_header);
				578	dst = kmap_atomic(page);
				579	ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
				580	entry->length, dst, &dlen);
				581	kunmap_atomic(dst);
				582	zbud_unmap(tree->pool, entry->handle);
				583	BUG_ON(ret);
				584	BUG_ON(dlen != PAGE_SIZE);
				585
				586	/* page is up to date */
				587	SetPageUptodate(page);
				588	}
				589
Weijie Yang	b349acc	2013-11-12 15:07:52 -0800	[diff] [blame]	590	/* move it to the tail of the inactive list after end_writeback */
				591	SetPageReclaim(page);
				592
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	593	/* start writeback */
				594	__swap_writepage(page, &wbc, end_swap_bio_write);
				595	page_cache_release(page);
				596	zswap_written_back_pages++;
				597
				598	spin_lock(&tree->lock);
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	599	/* drop local reference */
Weijie Yang	0ab0abc	2013-11-12 15:08:27 -0800	[diff] [blame]	600	zswap_entry_put(tree, entry);
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	601
				602	/*
Weijie Yang	0ab0abc	2013-11-12 15:08:27 -0800	[diff] [blame]	603	* There are two possible situations for entry here:
				604	* (1) refcount is 1(normal case), entry is valid and on the tree
				605	* (2) refcount is 0, entry is freed and not on the tree
				606	* because invalidate happened during writeback
				607	* search the tree and free the entry if find entry
				608	*/
				609	if (entry == zswap_rb_search(&tree->rbroot, offset))
				610	zswap_entry_put(tree, entry);
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	611	spin_unlock(&tree->lock);
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	612
Weijie Yang	0ab0abc	2013-11-12 15:08:27 -0800	[diff] [blame]	613	goto end;
				614
				615	/*
				616	* if we get here due to ZSWAP_SWAPCACHE_EXIST
				617	* a load may happening concurrently
				618	* it is safe and okay to not free the entry
				619	* if we free the entry in the following put
				620	* it it either okay to return !0
				621	*/
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	622	fail:
				623	spin_lock(&tree->lock);
Weijie Yang	0ab0abc	2013-11-12 15:08:27 -0800	[diff] [blame]	624	zswap_entry_put(tree, entry);
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	625	spin_unlock(&tree->lock);
Weijie Yang	0ab0abc	2013-11-12 15:08:27 -0800	[diff] [blame]	626
				627	end:
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	628	return ret;
				629	}
				630
				631	/*********************************
				632	* frontswap hooks
				633	**********************************/
				634	/* attempts to compress and store an single page */
				635	static int zswap_frontswap_store(unsigned type, pgoff_t offset,
				636	struct page *page)
				637	{
				638	struct zswap_tree *tree = zswap_trees[type];
				639	struct zswap_entry entry, dupentry;
				640	int ret;
				641	unsigned int dlen = PAGE_SIZE, len;
				642	unsigned long handle;
				643	char *buf;
				644	u8 src, dst;
				645	struct zswap_header *zhdr;
				646
				647	if (!tree) {
				648	ret = -ENODEV;
				649	goto reject;
				650	}
				651
				652	/* reclaim space if needed */
				653	if (zswap_is_full()) {
				654	zswap_pool_limit_hit++;
				655	if (zbud_reclaim_page(tree->pool, 8)) {
				656	zswap_reject_reclaim_fail++;
				657	ret = -ENOMEM;
				658	goto reject;
				659	}
				660	}
				661
				662	/* allocate entry */
				663	entry = zswap_entry_cache_alloc(GFP_KERNEL);
				664	if (!entry) {
				665	zswap_reject_kmemcache_fail++;
				666	ret = -ENOMEM;
				667	goto reject;
				668	}
				669
				670	/* compress */
				671	dst = get_cpu_var(zswap_dstmem);
				672	src = kmap_atomic(page);
				673	ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen);
				674	kunmap_atomic(src);
				675	if (ret) {
				676	ret = -EINVAL;
				677	goto freepage;
				678	}
				679
				680	/* store */
				681	len = dlen + sizeof(struct zswap_header);
				682	ret = zbud_alloc(tree->pool, len, __GFP_NORETRY \| __GFP_NOWARN,
				683	&handle);
				684	if (ret == -ENOSPC) {
				685	zswap_reject_compress_poor++;
				686	goto freepage;
				687	}
				688	if (ret) {
				689	zswap_reject_alloc_fail++;
				690	goto freepage;
				691	}
				692	zhdr = zbud_map(tree->pool, handle);
				693	zhdr->swpentry = swp_entry(type, offset);
				694	buf = (u8 *)(zhdr + 1);
				695	memcpy(buf, dst, dlen);
				696	zbud_unmap(tree->pool, handle);
				697	put_cpu_var(zswap_dstmem);
				698
				699	/* populate entry */
				700	entry->offset = offset;
				701	entry->handle = handle;
				702	entry->length = dlen;
				703
				704	/* map */
				705	spin_lock(&tree->lock);
				706	do {
				707	ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry);
				708	if (ret == -EEXIST) {
				709	zswap_duplicate_entry++;
				710	/* remove from rbtree */
Weijie Yang	0ab0abc	2013-11-12 15:08:27 -0800	[diff] [blame]	711	zswap_rb_erase(&tree->rbroot, dupentry);
				712	zswap_entry_put(tree, dupentry);
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	713	}
				714	} while (ret == -EEXIST);
				715	spin_unlock(&tree->lock);
				716
				717	/* update stats */
				718	atomic_inc(&zswap_stored_pages);
				719	zswap_pool_pages = zbud_get_pool_size(tree->pool);
				720
				721	return 0;
				722
				723	freepage:
				724	put_cpu_var(zswap_dstmem);
				725	zswap_entry_cache_free(entry);
				726	reject:
				727	return ret;
				728	}
				729
				730	/*
				731	* returns 0 if the page was successfully decompressed
				732	* return -1 on entry not found or error
				733	*/
				734	static int zswap_frontswap_load(unsigned type, pgoff_t offset,
				735	struct page *page)
				736	{
				737	struct zswap_tree *tree = zswap_trees[type];
				738	struct zswap_entry *entry;
				739	u8 src, dst;
				740	unsigned int dlen;
Weijie Yang	0ab0abc	2013-11-12 15:08:27 -0800	[diff] [blame]	741	int ret;
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	742
				743	/* find */
				744	spin_lock(&tree->lock);
Weijie Yang	0ab0abc	2013-11-12 15:08:27 -0800	[diff] [blame]	745	entry = zswap_entry_find_get(&tree->rbroot, offset);
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	746	if (!entry) {
				747	/* entry was written back */
				748	spin_unlock(&tree->lock);
				749	return -1;
				750	}
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	751	spin_unlock(&tree->lock);
				752
				753	/* decompress */
				754	dlen = PAGE_SIZE;
				755	src = (u8 *)zbud_map(tree->pool, entry->handle) +
				756	sizeof(struct zswap_header);
				757	dst = kmap_atomic(page);
				758	ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
				759	dst, &dlen);
				760	kunmap_atomic(dst);
				761	zbud_unmap(tree->pool, entry->handle);
				762	BUG_ON(ret);
				763
				764	spin_lock(&tree->lock);
Weijie Yang	0ab0abc	2013-11-12 15:08:27 -0800	[diff] [blame]	765	zswap_entry_put(tree, entry);
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	766	spin_unlock(&tree->lock);
				767
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	768	return 0;
				769	}
				770
				771	/* frees an entry in zswap */
				772	static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset)
				773	{
				774	struct zswap_tree *tree = zswap_trees[type];
				775	struct zswap_entry *entry;
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	776
				777	/* find */
				778	spin_lock(&tree->lock);
				779	entry = zswap_rb_search(&tree->rbroot, offset);
				780	if (!entry) {
				781	/* entry was written back */
				782	spin_unlock(&tree->lock);
				783	return;
				784	}
				785
				786	/* remove from rbtree */
Weijie Yang	0ab0abc	2013-11-12 15:08:27 -0800	[diff] [blame]	787	zswap_rb_erase(&tree->rbroot, entry);
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	788
				789	/* drop the initial reference from entry creation */
Weijie Yang	0ab0abc	2013-11-12 15:08:27 -0800	[diff] [blame]	790	zswap_entry_put(tree, entry);
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	791
				792	spin_unlock(&tree->lock);
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	793	}
				794
				795	/* frees all zswap entries for the given swap type */
				796	static void zswap_frontswap_invalidate_area(unsigned type)
				797	{
				798	struct zswap_tree *tree = zswap_trees[type];
Cody P Schafer	0bd4213	2013-09-11 14:25:33 -0700	[diff] [blame]	799	struct zswap_entry entry, n;
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	800
				801	if (!tree)
				802	return;
				803
				804	/* walk the tree and free everything */
				805	spin_lock(&tree->lock);
Weijie Yang	0ab0abc	2013-11-12 15:08:27 -0800	[diff] [blame]	806	rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode)
				807	zswap_free_entry(tree, entry);
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	808	tree->rbroot = RB_ROOT;
				809	spin_unlock(&tree->lock);
Weijie Yang	aa9bca0	2013-10-16 13:46:54 -0700	[diff] [blame]	810
				811	zbud_destroy_pool(tree->pool);
				812	kfree(tree);
				813	zswap_trees[type] = NULL;
Seth Jennings	2b28111	2013-07-10 16:05:03 -0700	[diff] [blame]	814	}
				815
				816	static struct zbud_ops zswap_zbud_ops = {
				817	.evict = zswap_writeback_entry
				818	};
				819
				820	static void zswap_frontswap_init(unsigned type)
				821	{
				822	struct zswap_tree *tree;
				823
				824	tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL);
				825	if (!tree)
				826	goto err;
				827	tree->pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
				828	if (!tree->pool)
				829	goto freetree;
				830	tree->rbroot = RB_ROOT;
				831	spin_lock_init(&tree->lock);
				832	zswap_trees[type] = tree;
				833	return;
				834
				835	freetree:
				836	kfree(tree);
				837	err:
				838	pr_err("alloc failed, zswap disabled for swap type %d\n", type);
				839	}
				840
				841	static struct frontswap_ops zswap_frontswap_ops = {
				842	.store = zswap_frontswap_store,
				843	.load = zswap_frontswap_load,
				844	.invalidate_page = zswap_frontswap_invalidate_page,
				845	.invalidate_area = zswap_frontswap_invalidate_area,
				846	.init = zswap_frontswap_init
				847	};
				848
				849	/*********************************
				850	* debugfs functions
				851	**********************************/
				852	#ifdef CONFIG_DEBUG_FS
				853	#include <linux/debugfs.h>
				854
				855	static struct dentry *zswap_debugfs_root;
				856
				857	static int __init zswap_debugfs_init(void)
				858	{
				859	if (!debugfs_initialized())
				860	return -ENODEV;
				861
				862	zswap_debugfs_root = debugfs_create_dir("zswap", NULL);
				863	if (!zswap_debugfs_root)
				864	return -ENOMEM;
				865
				866	debugfs_create_u64("pool_limit_hit", S_IRUGO,
				867	zswap_debugfs_root, &zswap_pool_limit_hit);
				868	debugfs_create_u64("reject_reclaim_fail", S_IRUGO,
				869	zswap_debugfs_root, &zswap_reject_reclaim_fail);
				870	debugfs_create_u64("reject_alloc_fail", S_IRUGO,
				871	zswap_debugfs_root, &zswap_reject_alloc_fail);
				872	debugfs_create_u64("reject_kmemcache_fail", S_IRUGO,
				873	zswap_debugfs_root, &zswap_reject_kmemcache_fail);
				874	debugfs_create_u64("reject_compress_poor", S_IRUGO,
				875	zswap_debugfs_root, &zswap_reject_compress_poor);
				876	debugfs_create_u64("written_back_pages", S_IRUGO,
				877	zswap_debugfs_root, &zswap_written_back_pages);
				878	debugfs_create_u64("duplicate_entry", S_IRUGO,
				879	zswap_debugfs_root, &zswap_duplicate_entry);
				880	debugfs_create_u64("pool_pages", S_IRUGO,
				881	zswap_debugfs_root, &zswap_pool_pages);
				882	debugfs_create_atomic_t("stored_pages", S_IRUGO,
				883	zswap_debugfs_root, &zswap_stored_pages);
				884
				885	return 0;
				886	}
				887
				888	static void __exit zswap_debugfs_exit(void)
				889	{
				890	debugfs_remove_recursive(zswap_debugfs_root);
				891	}
				892	#else
				893	static int __init zswap_debugfs_init(void)
				894	{
				895	return 0;
				896	}
				897
				898	static void __exit zswap_debugfs_exit(void) { }
				899	#endif
				900
				901	/*********************************
				902	* module init and exit
				903	**********************************/
				904	static int __init init_zswap(void)
				905	{
				906	if (!zswap_enabled)
				907	return 0;
				908
				909	pr_info("loading zswap\n");
				910	if (zswap_entry_cache_create()) {
				911	pr_err("entry cache creation failed\n");
				912	goto error;
				913	}
				914	if (zswap_comp_init()) {
				915	pr_err("compressor initialization failed\n");
				916	goto compfail;
				917	}
				918	if (zswap_cpu_init()) {
				919	pr_err("per-cpu initialization failed\n");
				920	goto pcpufail;
				921	}
				922	frontswap_register_ops(&zswap_frontswap_ops);
				923	if (zswap_debugfs_init())
				924	pr_warn("debugfs initialization failed\n");
				925	return 0;
				926	pcpufail:
				927	zswap_comp_exit();
				928	compfail:
				929	zswap_entry_cache_destory();
				930	error:
				931	return -ENOMEM;
				932	}
				933	/* must be late so crypto has time to come up */
				934	late_initcall(init_zswap);
				935
				936	MODULE_LICENSE("GPL");
				937	MODULE_AUTHOR("Seth Jennings <sjenning@linux.vnet.ibm.com>");
				938	MODULE_DESCRIPTION("Compressed cache for swap pages");