Blame - drivers/md/dm-thin.c - kernel/msm-4.9

blob: 7ca2bf2aafaa4a282f56b7ab9d5f8ceec73130f0 [file] [log] [blame]

Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1	/*
				2	* Copyright (C) 2011 Red Hat UK.
				3	*
				4	* This file is released under the GPL.
				5	*/
				6
				7	#include "dm-thin-metadata.h"
				8
				9	#include <linux/device-mapper.h>
				10	#include <linux/dm-io.h>
				11	#include <linux/dm-kcopyd.h>
				12	#include <linux/list.h>
				13	#include <linux/init.h>
				14	#include <linux/module.h>
				15	#include <linux/slab.h>
				16
				17	#define DM_MSG_PREFIX "thin"
				18
				19	/*
				20	* Tunable constants
				21	*/
				22	#define ENDIO_HOOK_POOL_SIZE 10240
				23	#define DEFERRED_SET_SIZE 64
				24	#define MAPPING_POOL_SIZE 1024
				25	#define PRISON_CELLS 1024
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	26	#define COMMIT_PERIOD HZ
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	27
				28	/*
				29	* The block size of the device holding pool data must be
				30	* between 64KB and 1GB.
				31	*/
				32	#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (64 * 1024 >> SECTOR_SHIFT)
				33	#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
				34
				35	/*
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	36	* Device id is restricted to 24 bits.
				37	*/
				38	#define MAX_DEV_ID ((1 << 24) - 1)
				39
				40	/*
				41	* How do we handle breaking sharing of data blocks?
				42	* =================================================
				43	*
				44	* We use a standard copy-on-write btree to store the mappings for the
				45	* devices (note I'm talking about copy-on-write of the metadata here, not
				46	* the data). When you take an internal snapshot you clone the root node
				47	* of the origin btree. After this there is no concept of an origin or a
				48	* snapshot. They are just two device trees that happen to point to the
				49	* same data blocks.
				50	*
				51	* When we get a write in we decide if it's to a shared data block using
				52	* some timestamp magic. If it is, we have to break sharing.
				53	*
				54	* Let's say we write to a shared block in what was the origin. The
				55	* steps are:
				56	*
				57	* i) plug io further to this physical block. (see bio_prison code).
				58	*
				59	* ii) quiesce any read io to that shared data block. Obviously
				60	* including all devices that share this block. (see deferred_set code)
				61	*
				62	* iii) copy the data block to a newly allocate block. This step can be
				63	* missed out if the io covers the block. (schedule_copy).
				64	*
				65	* iv) insert the new mapping into the origin's btree
Joe Thornber	fe878f3	2012-03-28 18:41:24 +0100	[diff] [blame]	66	* (process_prepared_mapping). This act of inserting breaks some
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	67	* sharing of btree nodes between the two devices. Breaking sharing only
				68	* effects the btree of that specific device. Btrees for the other
				69	* devices that share the block never change. The btree for the origin
				70	* device as it was after the last commit is untouched, ie. we're using
				71	* persistent data structures in the functional programming sense.
				72	*
				73	* v) unplug io to this physical block, including the io that triggered
				74	* the breaking of sharing.
				75	*
				76	* Steps (ii) and (iii) occur in parallel.
				77	*
				78	* The metadata _doesn't_ need to be committed before the io continues. We
				79	* get away with this because the io is always written to a _new_ block.
				80	* If there's a crash, then:
				81	*
				82	* - The origin mapping will point to the old origin block (the shared
				83	* one). This will contain the data as it was before the io that triggered
				84	* the breaking of sharing came in.
				85	*
				86	* - The snap mapping still points to the old block. As it would after
				87	* the commit.
				88	*
				89	* The downside of this scheme is the timestamp magic isn't perfect, and
				90	* will continue to think that data block in the snapshot device is shared
				91	* even after the write to the origin has broken sharing. I suspect data
				92	* blocks will typically be shared by many different devices, so we're
				93	* breaking sharing n + 1 times, rather than n, where n is the number of
				94	* devices that reference this data block. At the moment I think the
				95	* benefits far, far outweigh the disadvantages.
				96	*/
				97
				98	/----------------------------------------------------------------/
				99
				100	/*
				101	* Sometimes we can't deal with a bio straight away. We put them in prison
				102	* where they can't cause any mischief. Bios are put in a cell identified
				103	* by a key, multiple bios can be in the same cell. When the cell is
				104	* subsequently unlocked the bios become available.
				105	*/
				106	struct bio_prison;
				107
				108	struct cell_key {
				109	int virtual;
				110	dm_thin_id dev;
				111	dm_block_t block;
				112	};
				113
				114	struct cell {
				115	struct hlist_node list;
				116	struct bio_prison *prison;
				117	struct cell_key key;
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	118	struct bio *holder;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	119	struct bio_list bios;
				120	};
				121
				122	struct bio_prison {
				123	spinlock_t lock;
				124	mempool_t *cell_pool;
				125
				126	unsigned nr_buckets;
				127	unsigned hash_mask;
				128	struct hlist_head *cells;
				129	};
				130
				131	static uint32_t calc_nr_buckets(unsigned nr_cells)
				132	{
				133	uint32_t n = 128;
				134
				135	nr_cells /= 4;
				136	nr_cells = min(nr_cells, 8192u);
				137
				138	while (n < nr_cells)
				139	n <<= 1;
				140
				141	return n;
				142	}
				143
				144	/*
				145	* @nr_cells should be the number of cells you want in use _concurrently_.
				146	* Don't confuse it with the number of distinct keys.
				147	*/
				148	static struct bio_prison *prison_create(unsigned nr_cells)
				149	{
				150	unsigned i;
				151	uint32_t nr_buckets = calc_nr_buckets(nr_cells);
				152	size_t len = sizeof(struct bio_prison) +
				153	(sizeof(struct hlist_head) * nr_buckets);
				154	struct bio_prison *prison = kmalloc(len, GFP_KERNEL);
				155
				156	if (!prison)
				157	return NULL;
				158
				159	spin_lock_init(&prison->lock);
				160	prison->cell_pool = mempool_create_kmalloc_pool(nr_cells,
				161	sizeof(struct cell));
				162	if (!prison->cell_pool) {
				163	kfree(prison);
				164	return NULL;
				165	}
				166
				167	prison->nr_buckets = nr_buckets;
				168	prison->hash_mask = nr_buckets - 1;
				169	prison->cells = (struct hlist_head *) (prison + 1);
				170	for (i = 0; i < nr_buckets; i++)
				171	INIT_HLIST_HEAD(prison->cells + i);
				172
				173	return prison;
				174	}
				175
				176	static void prison_destroy(struct bio_prison *prison)
				177	{
				178	mempool_destroy(prison->cell_pool);
				179	kfree(prison);
				180	}
				181
				182	static uint32_t hash_key(struct bio_prison prison, struct cell_key key)
				183	{
				184	const unsigned long BIG_PRIME = 4294967291UL;
				185	uint64_t hash = key->block * BIG_PRIME;
				186
				187	return (uint32_t) (hash & prison->hash_mask);
				188	}
				189
				190	static int keys_equal(struct cell_key lhs, struct cell_key rhs)
				191	{
				192	return (lhs->virtual == rhs->virtual) &&
				193	(lhs->dev == rhs->dev) &&
				194	(lhs->block == rhs->block);
				195	}
				196
				197	static struct cell __search_bucket(struct hlist_head bucket,
				198	struct cell_key *key)
				199	{
				200	struct cell *cell;
				201	struct hlist_node *tmp;
				202
				203	hlist_for_each_entry(cell, tmp, bucket, list)
				204	if (keys_equal(&cell->key, key))
				205	return cell;
				206
				207	return NULL;
				208	}
				209
				210	/*
				211	* This may block if a new cell needs allocating. You must ensure that
				212	* cells will be unlocked even if the calling thread is blocked.
				213	*
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	214	* Returns 1 if the cell was already held, 0 if @inmate is the new holder.
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	215	*/
				216	static int bio_detain(struct bio_prison prison, struct cell_key key,
				217	struct bio inmate, struct cell *ref)
				218	{
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	219	int r = 1;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	220	unsigned long flags;
				221	uint32_t hash = hash_key(prison, key);
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	222	struct cell cell, cell2;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	223
				224	BUG_ON(hash > prison->nr_buckets);
				225
				226	spin_lock_irqsave(&prison->lock, flags);
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	227
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	228	cell = __search_bucket(prison->cells + hash, key);
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	229	if (cell) {
				230	bio_list_add(&cell->bios, inmate);
				231	goto out;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	232	}
				233
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	234	/*
				235	* Allocate a new cell
				236	*/
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	237	spin_unlock_irqrestore(&prison->lock, flags);
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	238	cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO);
				239	spin_lock_irqsave(&prison->lock, flags);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	240
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	241	/*
				242	* We've been unlocked, so we have to double check that
				243	* nobody else has inserted this cell in the meantime.
				244	*/
				245	cell = __search_bucket(prison->cells + hash, key);
				246	if (cell) {
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	247	mempool_free(cell2, prison->cell_pool);
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	248	bio_list_add(&cell->bios, inmate);
				249	goto out;
				250	}
				251
				252	/*
				253	* Use new cell.
				254	*/
				255	cell = cell2;
				256
				257	cell->prison = prison;
				258	memcpy(&cell->key, key, sizeof(cell->key));
				259	cell->holder = inmate;
				260	bio_list_init(&cell->bios);
				261	hlist_add_head(&cell->list, prison->cells + hash);
				262
				263	r = 0;
				264
				265	out:
				266	spin_unlock_irqrestore(&prison->lock, flags);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	267
				268	*ref = cell;
				269
				270	return r;
				271	}
				272
				273	/*
				274	* @inmates must have been initialised prior to this call
				275	*/
				276	static void __cell_release(struct cell cell, struct bio_list inmates)
				277	{
				278	struct bio_prison *prison = cell->prison;
				279
				280	hlist_del(&cell->list);
				281
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	282	bio_list_add(inmates, cell->holder);
				283	bio_list_merge(inmates, &cell->bios);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	284
				285	mempool_free(cell, prison->cell_pool);
				286	}
				287
				288	static void cell_release(struct cell cell, struct bio_list bios)
				289	{
				290	unsigned long flags;
				291	struct bio_prison *prison = cell->prison;
				292
				293	spin_lock_irqsave(&prison->lock, flags);
				294	__cell_release(cell, bios);
				295	spin_unlock_irqrestore(&prison->lock, flags);
				296	}
				297
				298	/*
				299	* There are a couple of places where we put a bio into a cell briefly
				300	* before taking it out again. In these situations we know that no other
				301	* bio may be in the cell. This function releases the cell, and also does
				302	* a sanity check.
				303	*/
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	304	static void __cell_release_singleton(struct cell cell, struct bio bio)
				305	{
				306	hlist_del(&cell->list);
				307	BUG_ON(cell->holder != bio);
				308	BUG_ON(!bio_list_empty(&cell->bios));
				309	}
				310
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	311	static void cell_release_singleton(struct cell cell, struct bio bio)
				312	{
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	313	unsigned long flags;
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	314	struct bio_prison *prison = cell->prison;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	315
				316	spin_lock_irqsave(&prison->lock, flags);
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	317	__cell_release_singleton(cell, bio);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	318	spin_unlock_irqrestore(&prison->lock, flags);
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	319	}
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	320
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	321	/*
				322	* Sometimes we don't want the holder, just the additional bios.
				323	*/
				324	static void __cell_release_no_holder(struct cell cell, struct bio_list inmates)
				325	{
				326	struct bio_prison *prison = cell->prison;
				327
				328	hlist_del(&cell->list);
				329	bio_list_merge(inmates, &cell->bios);
				330
				331	mempool_free(cell, prison->cell_pool);
				332	}
				333
				334	static void cell_release_no_holder(struct cell cell, struct bio_list inmates)
				335	{
				336	unsigned long flags;
				337	struct bio_prison *prison = cell->prison;
				338
				339	spin_lock_irqsave(&prison->lock, flags);
				340	__cell_release_no_holder(cell, inmates);
				341	spin_unlock_irqrestore(&prison->lock, flags);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	342	}
				343
				344	static void cell_error(struct cell *cell)
				345	{
				346	struct bio_prison *prison = cell->prison;
				347	struct bio_list bios;
				348	struct bio *bio;
				349	unsigned long flags;
				350
				351	bio_list_init(&bios);
				352
				353	spin_lock_irqsave(&prison->lock, flags);
				354	__cell_release(cell, &bios);
				355	spin_unlock_irqrestore(&prison->lock, flags);
				356
				357	while ((bio = bio_list_pop(&bios)))
				358	bio_io_error(bio);
				359	}
				360
				361	/----------------------------------------------------------------/
				362
				363	/*
				364	* We use the deferred set to keep track of pending reads to shared blocks.
				365	* We do this to ensure the new mapping caused by a write isn't performed
				366	* until these prior reads have completed. Otherwise the insertion of the
				367	* new mapping could free the old block that the read bios are mapped to.
				368	*/
				369
				370	struct deferred_set;
				371	struct deferred_entry {
				372	struct deferred_set *ds;
				373	unsigned count;
				374	struct list_head work_items;
				375	};
				376
				377	struct deferred_set {
				378	spinlock_t lock;
				379	unsigned current_entry;
				380	unsigned sweeper;
				381	struct deferred_entry entries[DEFERRED_SET_SIZE];
				382	};
				383
				384	static void ds_init(struct deferred_set *ds)
				385	{
				386	int i;
				387
				388	spin_lock_init(&ds->lock);
				389	ds->current_entry = 0;
				390	ds->sweeper = 0;
				391	for (i = 0; i < DEFERRED_SET_SIZE; i++) {
				392	ds->entries[i].ds = ds;
				393	ds->entries[i].count = 0;
				394	INIT_LIST_HEAD(&ds->entries[i].work_items);
				395	}
				396	}
				397
				398	static struct deferred_entry ds_inc(struct deferred_set ds)
				399	{
				400	unsigned long flags;
				401	struct deferred_entry *entry;
				402
				403	spin_lock_irqsave(&ds->lock, flags);
				404	entry = ds->entries + ds->current_entry;
				405	entry->count++;
				406	spin_unlock_irqrestore(&ds->lock, flags);
				407
				408	return entry;
				409	}
				410
				411	static unsigned ds_next(unsigned index)
				412	{
				413	return (index + 1) % DEFERRED_SET_SIZE;
				414	}
				415
				416	static void __sweep(struct deferred_set ds, struct list_head head)
				417	{
				418	while ((ds->sweeper != ds->current_entry) &&
				419	!ds->entries[ds->sweeper].count) {
				420	list_splice_init(&ds->entries[ds->sweeper].work_items, head);
				421	ds->sweeper = ds_next(ds->sweeper);
				422	}
				423
				424	if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count)
				425	list_splice_init(&ds->entries[ds->sweeper].work_items, head);
				426	}
				427
				428	static void ds_dec(struct deferred_entry entry, struct list_head head)
				429	{
				430	unsigned long flags;
				431
				432	spin_lock_irqsave(&entry->ds->lock, flags);
				433	BUG_ON(!entry->count);
				434	--entry->count;
				435	__sweep(entry->ds, head);
				436	spin_unlock_irqrestore(&entry->ds->lock, flags);
				437	}
				438
				439	/*
				440	* Returns 1 if deferred or 0 if no pending items to delay job.
				441	*/
				442	static int ds_add_work(struct deferred_set ds, struct list_head work)
				443	{
				444	int r = 1;
				445	unsigned long flags;
				446	unsigned next_entry;
				447
				448	spin_lock_irqsave(&ds->lock, flags);
				449	if ((ds->sweeper == ds->current_entry) &&
				450	!ds->entries[ds->current_entry].count)
				451	r = 0;
				452	else {
				453	list_add(work, &ds->entries[ds->current_entry].work_items);
				454	next_entry = ds_next(ds->current_entry);
				455	if (!ds->entries[next_entry].count)
				456	ds->current_entry = next_entry;
				457	}
				458	spin_unlock_irqrestore(&ds->lock, flags);
				459
				460	return r;
				461	}
				462
				463	/----------------------------------------------------------------/
				464
				465	/*
				466	* Key building.
				467	*/
				468	static void build_data_key(struct dm_thin_device *td,
				469	dm_block_t b, struct cell_key *key)
				470	{
				471	key->virtual = 0;
				472	key->dev = dm_thin_dev_id(td);
				473	key->block = b;
				474	}
				475
				476	static void build_virtual_key(struct dm_thin_device *td, dm_block_t b,
				477	struct cell_key *key)
				478	{
				479	key->virtual = 1;
				480	key->dev = dm_thin_dev_id(td);
				481	key->block = b;
				482	}
				483
				484	/----------------------------------------------------------------/
				485
				486	/*
				487	* A pool device ties together a metadata device and a data device. It
				488	* also provides the interface for creating and destroying internal
				489	* devices.
				490	*/
				491	struct new_mapping;
				492	struct pool {
				493	struct list_head list;
				494	struct dm_target ti; / Only set if a pool target is bound */
				495
				496	struct mapped_device *pool_md;
				497	struct block_device *md_dev;
				498	struct dm_pool_metadata *pmd;
				499
				500	uint32_t sectors_per_block;
				501	unsigned block_shift;
				502	dm_block_t offset_mask;
				503	dm_block_t low_water_blocks;
				504
				505	unsigned zero_new_blocks:1;
				506	unsigned low_water_triggered:1; /* A dm event has been sent */
				507	unsigned no_free_space:1; /* A -ENOSPC warning has been issued */
				508
				509	struct bio_prison *prison;
				510	struct dm_kcopyd_client *copier;
				511
				512	struct workqueue_struct *wq;
				513	struct work_struct worker;
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	514	struct delayed_work waker;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	515
				516	unsigned ref_count;
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	517	unsigned long last_commit_jiffies;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	518
				519	spinlock_t lock;
				520	struct bio_list deferred_bios;
				521	struct bio_list deferred_flush_bios;
				522	struct list_head prepared_mappings;
				523
				524	struct bio_list retry_on_resume_list;
				525
				526	struct deferred_set ds; /* FIXME: move to thin_c */
				527
				528	struct new_mapping *next_mapping;
				529	mempool_t *mapping_pool;
				530	mempool_t *endio_hook_pool;
				531	};
				532
				533	/*
				534	* Target context for a pool.
				535	*/
				536	struct pool_c {
				537	struct dm_target *ti;
				538	struct pool *pool;
				539	struct dm_dev *data_dev;
				540	struct dm_dev *metadata_dev;
				541	struct dm_target_callbacks callbacks;
				542
				543	dm_block_t low_water_blocks;
				544	unsigned zero_new_blocks:1;
				545	};
				546
				547	/*
				548	* Target context for a thin.
				549	*/
				550	struct thin_c {
				551	struct dm_dev *pool_dev;
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	552	struct dm_dev *origin_dev;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	553	dm_thin_id dev_id;
				554
				555	struct pool *pool;
				556	struct dm_thin_device *td;
				557	};
				558
				559	/----------------------------------------------------------------/
				560
				561	/*
				562	* A global list of pools that uses a struct mapped_device as a key.
				563	*/
				564	static struct dm_thin_pool_table {
				565	struct mutex mutex;
				566	struct list_head pools;
				567	} dm_thin_pool_table;
				568
				569	static void pool_table_init(void)
				570	{
				571	mutex_init(&dm_thin_pool_table.mutex);
				572	INIT_LIST_HEAD(&dm_thin_pool_table.pools);
				573	}
				574
				575	static void __pool_table_insert(struct pool *pool)
				576	{
				577	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
				578	list_add(&pool->list, &dm_thin_pool_table.pools);
				579	}
				580
				581	static void __pool_table_remove(struct pool *pool)
				582	{
				583	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
				584	list_del(&pool->list);
				585	}
				586
				587	static struct pool __pool_table_lookup(struct mapped_device md)
				588	{
				589	struct pool pool = NULL, tmp;
				590
				591	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
				592
				593	list_for_each_entry(tmp, &dm_thin_pool_table.pools, list) {
				594	if (tmp->pool_md == md) {
				595	pool = tmp;
				596	break;
				597	}
				598	}
				599
				600	return pool;
				601	}
				602
				603	static struct pool __pool_table_lookup_metadata_dev(struct block_device md_dev)
				604	{
				605	struct pool pool = NULL, tmp;
				606
				607	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
				608
				609	list_for_each_entry(tmp, &dm_thin_pool_table.pools, list) {
				610	if (tmp->md_dev == md_dev) {
				611	pool = tmp;
				612	break;
				613	}
				614	}
				615
				616	return pool;
				617	}
				618
				619	/----------------------------------------------------------------/
				620
				621	static void __requeue_bio_list(struct thin_c tc, struct bio_list master)
				622	{
				623	struct bio *bio;
				624	struct bio_list bios;
				625
				626	bio_list_init(&bios);
				627	bio_list_merge(&bios, master);
				628	bio_list_init(master);
				629
				630	while ((bio = bio_list_pop(&bios))) {
				631	if (dm_get_mapinfo(bio)->ptr == tc)
				632	bio_endio(bio, DM_ENDIO_REQUEUE);
				633	else
				634	bio_list_add(master, bio);
				635	}
				636	}
				637
				638	static void requeue_io(struct thin_c *tc)
				639	{
				640	struct pool *pool = tc->pool;
				641	unsigned long flags;
				642
				643	spin_lock_irqsave(&pool->lock, flags);
				644	__requeue_bio_list(tc, &pool->deferred_bios);
				645	__requeue_bio_list(tc, &pool->retry_on_resume_list);
				646	spin_unlock_irqrestore(&pool->lock, flags);
				647	}
				648
				649	/*
				650	* This section of code contains the logic for processing a thin device's IO.
				651	* Much of the code depends on pool object resources (lists, workqueues, etc)
				652	* but most is exclusively called from the thin target rather than the thin-pool
				653	* target.
				654	*/
				655
				656	static dm_block_t get_bio_block(struct thin_c tc, struct bio bio)
				657	{
				658	return bio->bi_sector >> tc->pool->block_shift;
				659	}
				660
				661	static void remap(struct thin_c tc, struct bio bio, dm_block_t block)
				662	{
				663	struct pool *pool = tc->pool;
				664
				665	bio->bi_bdev = tc->pool_dev->bdev;
				666	bio->bi_sector = (block << pool->block_shift) +
				667	(bio->bi_sector & pool->offset_mask);
				668	}
				669
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	670	static void remap_to_origin(struct thin_c tc, struct bio bio)
				671	{
				672	bio->bi_bdev = tc->origin_dev->bdev;
				673	}
				674
				675	static void issue(struct thin_c tc, struct bio bio)
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	676	{
				677	struct pool *pool = tc->pool;
				678	unsigned long flags;
				679
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	680	/*
				681	* Batch together any FUA/FLUSH bios we find and then issue
				682	* a single commit for them in process_deferred_bios().
				683	*/
				684	if (bio->bi_rw & (REQ_FLUSH \| REQ_FUA)) {
				685	spin_lock_irqsave(&pool->lock, flags);
				686	bio_list_add(&pool->deferred_flush_bios, bio);
				687	spin_unlock_irqrestore(&pool->lock, flags);
				688	} else
				689	generic_make_request(bio);
				690	}
				691
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	692	static void remap_to_origin_and_issue(struct thin_c tc, struct bio bio)
				693	{
				694	remap_to_origin(tc, bio);
				695	issue(tc, bio);
				696	}
				697
				698	static void remap_and_issue(struct thin_c tc, struct bio bio,
				699	dm_block_t block)
				700	{
				701	remap(tc, bio, block);
				702	issue(tc, bio);
				703	}
				704
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	705	/*
				706	* wake_worker() is used when new work is queued and when pool_resume is
				707	* ready to continue deferred IO processing.
				708	*/
				709	static void wake_worker(struct pool *pool)
				710	{
				711	queue_work(pool->wq, &pool->worker);
				712	}
				713
				714	/----------------------------------------------------------------/
				715
				716	/*
				717	* Bio endio functions.
				718	*/
				719	struct endio_hook {
				720	struct thin_c *tc;
				721	bio_end_io_t *saved_bi_end_io;
				722	struct deferred_entry *entry;
				723	};
				724
				725	struct new_mapping {
				726	struct list_head list;
				727
				728	int prepared;
				729
				730	struct thin_c *tc;
				731	dm_block_t virt_block;
				732	dm_block_t data_block;
				733	struct cell *cell;
				734	int err;
				735
				736	/*
				737	* If the bio covers the whole area of a block then we can avoid
				738	* zeroing or copying. Instead this bio is hooked. The bio will
				739	* still be in the cell, so care has to be taken to avoid issuing
				740	* the bio twice.
				741	*/
				742	struct bio *bio;
				743	bio_end_io_t *saved_bi_end_io;
				744	};
				745
				746	static void __maybe_add_mapping(struct new_mapping *m)
				747	{
				748	struct pool *pool = m->tc->pool;
				749
				750	if (list_empty(&m->list) && m->prepared) {
				751	list_add(&m->list, &pool->prepared_mappings);
				752	wake_worker(pool);
				753	}
				754	}
				755
				756	static void copy_complete(int read_err, unsigned long write_err, void *context)
				757	{
				758	unsigned long flags;
				759	struct new_mapping *m = context;
				760	struct pool *pool = m->tc->pool;
				761
				762	m->err = read_err \|\| write_err ? -EIO : 0;
				763
				764	spin_lock_irqsave(&pool->lock, flags);
				765	m->prepared = 1;
				766	__maybe_add_mapping(m);
				767	spin_unlock_irqrestore(&pool->lock, flags);
				768	}
				769
				770	static void overwrite_endio(struct bio *bio, int err)
				771	{
				772	unsigned long flags;
				773	struct new_mapping *m = dm_get_mapinfo(bio)->ptr;
				774	struct pool *pool = m->tc->pool;
				775
				776	m->err = err;
				777
				778	spin_lock_irqsave(&pool->lock, flags);
				779	m->prepared = 1;
				780	__maybe_add_mapping(m);
				781	spin_unlock_irqrestore(&pool->lock, flags);
				782	}
				783
				784	static void shared_read_endio(struct bio *bio, int err)
				785	{
				786	struct list_head mappings;
				787	struct new_mapping m, tmp;
				788	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
				789	unsigned long flags;
				790	struct pool *pool = h->tc->pool;
				791
				792	bio->bi_end_io = h->saved_bi_end_io;
				793	bio_endio(bio, err);
				794
				795	INIT_LIST_HEAD(&mappings);
				796	ds_dec(h->entry, &mappings);
				797
				798	spin_lock_irqsave(&pool->lock, flags);
				799	list_for_each_entry_safe(m, tmp, &mappings, list) {
				800	list_del(&m->list);
				801	INIT_LIST_HEAD(&m->list);
				802	__maybe_add_mapping(m);
				803	}
				804	spin_unlock_irqrestore(&pool->lock, flags);
				805
				806	mempool_free(h, pool->endio_hook_pool);
				807	}
				808
				809	/----------------------------------------------------------------/
				810
				811	/*
				812	* Workqueue.
				813	*/
				814
				815	/*
				816	* Prepared mapping jobs.
				817	*/
				818
				819	/*
				820	* This sends the bios in the cell back to the deferred_bios list.
				821	*/
				822	static void cell_defer(struct thin_c tc, struct cell cell,
				823	dm_block_t data_block)
				824	{
				825	struct pool *pool = tc->pool;
				826	unsigned long flags;
				827
				828	spin_lock_irqsave(&pool->lock, flags);
				829	cell_release(cell, &pool->deferred_bios);
				830	spin_unlock_irqrestore(&tc->pool->lock, flags);
				831
				832	wake_worker(pool);
				833	}
				834
				835	/*
				836	* Same as cell_defer above, except it omits one particular detainee,
				837	* a write bio that covers the block and has already been processed.
				838	*/
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	839	static void cell_defer_except(struct thin_c tc, struct cell cell)
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	840	{
				841	struct bio_list bios;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	842	struct pool *pool = tc->pool;
				843	unsigned long flags;
				844
				845	bio_list_init(&bios);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	846
				847	spin_lock_irqsave(&pool->lock, flags);
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	848	cell_release_no_holder(cell, &pool->deferred_bios);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	849	spin_unlock_irqrestore(&pool->lock, flags);
				850
				851	wake_worker(pool);
				852	}
				853
				854	static void process_prepared_mapping(struct new_mapping *m)
				855	{
				856	struct thin_c *tc = m->tc;
				857	struct bio *bio;
				858	int r;
				859
				860	bio = m->bio;
				861	if (bio)
				862	bio->bi_end_io = m->saved_bi_end_io;
				863
				864	if (m->err) {
				865	cell_error(m->cell);
				866	return;
				867	}
				868
				869	/*
				870	* Commit the prepared block into the mapping btree.
				871	* Any I/O for this block arriving after this point will get
				872	* remapped to it directly.
				873	*/
				874	r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block);
				875	if (r) {
				876	DMERR("dm_thin_insert_block() failed");
				877	cell_error(m->cell);
				878	return;
				879	}
				880
				881	/*
				882	* Release any bios held while the block was being provisioned.
				883	* If we are processing a write bio that completely covers the block,
				884	* we already processed it so can ignore it now when processing
				885	* the bios in the cell.
				886	*/
				887	if (bio) {
Joe Thornber	6f94a4c	2012-03-28 18:41:23 +0100	[diff] [blame]	888	cell_defer_except(tc, m->cell);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	889	bio_endio(bio, 0);
				890	} else
				891	cell_defer(tc, m->cell, m->data_block);
				892
				893	list_del(&m->list);
				894	mempool_free(m, tc->pool->mapping_pool);
				895	}
				896
				897	static void process_prepared_mappings(struct pool *pool)
				898	{
				899	unsigned long flags;
				900	struct list_head maps;
				901	struct new_mapping m, tmp;
				902
				903	INIT_LIST_HEAD(&maps);
				904	spin_lock_irqsave(&pool->lock, flags);
				905	list_splice_init(&pool->prepared_mappings, &maps);
				906	spin_unlock_irqrestore(&pool->lock, flags);
				907
				908	list_for_each_entry_safe(m, tmp, &maps, list)
				909	process_prepared_mapping(m);
				910	}
				911
				912	/*
				913	* Deferred bio jobs.
				914	*/
				915	static int io_overwrites_block(struct pool pool, struct bio bio)
				916	{
				917	return ((bio_data_dir(bio) == WRITE) &&
				918	!(bio->bi_sector & pool->offset_mask)) &&
				919	(bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
				920	}
				921
				922	static void save_and_set_endio(struct bio bio, bio_end_io_t *save,
				923	bio_end_io_t *fn)
				924	{
				925	*save = bio->bi_end_io;
				926	bio->bi_end_io = fn;
				927	}
				928
				929	static int ensure_next_mapping(struct pool *pool)
				930	{
				931	if (pool->next_mapping)
				932	return 0;
				933
				934	pool->next_mapping = mempool_alloc(pool->mapping_pool, GFP_ATOMIC);
				935
				936	return pool->next_mapping ? 0 : -ENOMEM;
				937	}
				938
				939	static struct new_mapping get_next_mapping(struct pool pool)
				940	{
				941	struct new_mapping *r = pool->next_mapping;
				942
				943	BUG_ON(!pool->next_mapping);
				944
				945	pool->next_mapping = NULL;
				946
				947	return r;
				948	}
				949
				950	static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	951	struct dm_dev *origin, dm_block_t data_origin,
				952	dm_block_t data_dest,
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	953	struct cell cell, struct bio bio)
				954	{
				955	int r;
				956	struct pool *pool = tc->pool;
				957	struct new_mapping *m = get_next_mapping(pool);
				958
				959	INIT_LIST_HEAD(&m->list);
				960	m->prepared = 0;
				961	m->tc = tc;
				962	m->virt_block = virt_block;
				963	m->data_block = data_dest;
				964	m->cell = cell;
				965	m->err = 0;
				966	m->bio = NULL;
				967
				968	ds_add_work(&pool->ds, &m->list);
				969
				970	/*
				971	* IO to pool_dev remaps to the pool target's data_dev.
				972	*
				973	* If the whole block of data is being overwritten, we can issue the
				974	* bio immediately. Otherwise we use kcopyd to clone the data first.
				975	*/
				976	if (io_overwrites_block(pool, bio)) {
				977	m->bio = bio;
				978	save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
				979	dm_get_mapinfo(bio)->ptr = m;
				980	remap_and_issue(tc, bio, data_dest);
				981	} else {
				982	struct dm_io_region from, to;
				983
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	984	from.bdev = origin->bdev;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	985	from.sector = data_origin * pool->sectors_per_block;
				986	from.count = pool->sectors_per_block;
				987
				988	to.bdev = tc->pool_dev->bdev;
				989	to.sector = data_dest * pool->sectors_per_block;
				990	to.count = pool->sectors_per_block;
				991
				992	r = dm_kcopyd_copy(pool->copier, &from, 1, &to,
				993	0, copy_complete, m);
				994	if (r < 0) {
				995	mempool_free(m, pool->mapping_pool);
				996	DMERR("dm_kcopyd_copy() failed");
				997	cell_error(cell);
				998	}
				999	}
				1000	}
				1001
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	1002	static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
				1003	dm_block_t data_origin, dm_block_t data_dest,
				1004	struct cell cell, struct bio bio)
				1005	{
				1006	schedule_copy(tc, virt_block, tc->pool_dev,
				1007	data_origin, data_dest, cell, bio);
				1008	}
				1009
				1010	static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
				1011	dm_block_t data_dest,
				1012	struct cell cell, struct bio bio)
				1013	{
				1014	schedule_copy(tc, virt_block, tc->origin_dev,
				1015	virt_block, data_dest, cell, bio);
				1016	}
				1017
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1018	static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
				1019	dm_block_t data_block, struct cell *cell,
				1020	struct bio *bio)
				1021	{
				1022	struct pool *pool = tc->pool;
				1023	struct new_mapping *m = get_next_mapping(pool);
				1024
				1025	INIT_LIST_HEAD(&m->list);
				1026	m->prepared = 0;
				1027	m->tc = tc;
				1028	m->virt_block = virt_block;
				1029	m->data_block = data_block;
				1030	m->cell = cell;
				1031	m->err = 0;
				1032	m->bio = NULL;
				1033
				1034	/*
				1035	* If the whole block of data is being overwritten or we are not
				1036	* zeroing pre-existing data, we can issue the bio immediately.
				1037	* Otherwise we use kcopyd to zero the data first.
				1038	*/
				1039	if (!pool->zero_new_blocks)
				1040	process_prepared_mapping(m);
				1041
				1042	else if (io_overwrites_block(pool, bio)) {
				1043	m->bio = bio;
				1044	save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
				1045	dm_get_mapinfo(bio)->ptr = m;
				1046	remap_and_issue(tc, bio, data_block);
				1047
				1048	} else {
				1049	int r;
				1050	struct dm_io_region to;
				1051
				1052	to.bdev = tc->pool_dev->bdev;
				1053	to.sector = data_block * pool->sectors_per_block;
				1054	to.count = pool->sectors_per_block;
				1055
				1056	r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m);
				1057	if (r < 0) {
				1058	mempool_free(m, pool->mapping_pool);
				1059	DMERR("dm_kcopyd_zero() failed");
				1060	cell_error(cell);
				1061	}
				1062	}
				1063	}
				1064
				1065	static int alloc_data_block(struct thin_c tc, dm_block_t result)
				1066	{
				1067	int r;
				1068	dm_block_t free_blocks;
				1069	unsigned long flags;
				1070	struct pool *pool = tc->pool;
				1071
				1072	r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
				1073	if (r)
				1074	return r;
				1075
				1076	if (free_blocks <= pool->low_water_blocks && !pool->low_water_triggered) {
				1077	DMWARN("%s: reached low water mark, sending event.",
				1078	dm_device_name(pool->pool_md));
				1079	spin_lock_irqsave(&pool->lock, flags);
				1080	pool->low_water_triggered = 1;
				1081	spin_unlock_irqrestore(&pool->lock, flags);
				1082	dm_table_event(pool->ti->table);
				1083	}
				1084
				1085	if (!free_blocks) {
				1086	if (pool->no_free_space)
				1087	return -ENOSPC;
				1088	else {
				1089	/*
				1090	* Try to commit to see if that will free up some
				1091	* more space.
				1092	*/
				1093	r = dm_pool_commit_metadata(pool->pmd);
				1094	if (r) {
				1095	DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
				1096	__func__, r);
				1097	return r;
				1098	}
				1099
				1100	r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
				1101	if (r)
				1102	return r;
				1103
				1104	/*
				1105	* If we still have no space we set a flag to avoid
				1106	* doing all this checking and return -ENOSPC.
				1107	*/
				1108	if (!free_blocks) {
				1109	DMWARN("%s: no free space available.",
				1110	dm_device_name(pool->pool_md));
				1111	spin_lock_irqsave(&pool->lock, flags);
				1112	pool->no_free_space = 1;
				1113	spin_unlock_irqrestore(&pool->lock, flags);
				1114	return -ENOSPC;
				1115	}
				1116	}
				1117	}
				1118
				1119	r = dm_pool_alloc_data_block(pool->pmd, result);
				1120	if (r)
				1121	return r;
				1122
				1123	return 0;
				1124	}
				1125
				1126	/*
				1127	* If we have run out of space, queue bios until the device is
				1128	* resumed, presumably after having been reloaded with more space.
				1129	*/
				1130	static void retry_on_resume(struct bio *bio)
				1131	{
				1132	struct thin_c *tc = dm_get_mapinfo(bio)->ptr;
				1133	struct pool *pool = tc->pool;
				1134	unsigned long flags;
				1135
				1136	spin_lock_irqsave(&pool->lock, flags);
				1137	bio_list_add(&pool->retry_on_resume_list, bio);
				1138	spin_unlock_irqrestore(&pool->lock, flags);
				1139	}
				1140
				1141	static void no_space(struct cell *cell)
				1142	{
				1143	struct bio *bio;
				1144	struct bio_list bios;
				1145
				1146	bio_list_init(&bios);
				1147	cell_release(cell, &bios);
				1148
				1149	while ((bio = bio_list_pop(&bios)))
				1150	retry_on_resume(bio);
				1151	}
				1152
				1153	static void break_sharing(struct thin_c tc, struct bio bio, dm_block_t block,
				1154	struct cell_key *key,
				1155	struct dm_thin_lookup_result *lookup_result,
				1156	struct cell *cell)
				1157	{
				1158	int r;
				1159	dm_block_t data_block;
				1160
				1161	r = alloc_data_block(tc, &data_block);
				1162	switch (r) {
				1163	case 0:
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	1164	schedule_internal_copy(tc, block, lookup_result->block,
				1165	data_block, cell, bio);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1166	break;
				1167
				1168	case -ENOSPC:
				1169	no_space(cell);
				1170	break;
				1171
				1172	default:
				1173	DMERR("%s: alloc_data_block() failed, error = %d", __func__, r);
				1174	cell_error(cell);
				1175	break;
				1176	}
				1177	}
				1178
				1179	static void process_shared_bio(struct thin_c tc, struct bio bio,
				1180	dm_block_t block,
				1181	struct dm_thin_lookup_result *lookup_result)
				1182	{
				1183	struct cell *cell;
				1184	struct pool *pool = tc->pool;
				1185	struct cell_key key;
				1186
				1187	/*
				1188	* If cell is already occupied, then sharing is already in the process
				1189	* of being broken so we have nothing further to do here.
				1190	*/
				1191	build_data_key(tc->td, lookup_result->block, &key);
				1192	if (bio_detain(pool->prison, &key, bio, &cell))
				1193	return;
				1194
				1195	if (bio_data_dir(bio) == WRITE)
				1196	break_sharing(tc, bio, block, &key, lookup_result, cell);
				1197	else {
				1198	struct endio_hook *h;
				1199	h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO);
				1200
				1201	h->tc = tc;
				1202	h->entry = ds_inc(&pool->ds);
				1203	save_and_set_endio(bio, &h->saved_bi_end_io, shared_read_endio);
				1204	dm_get_mapinfo(bio)->ptr = h;
				1205
				1206	cell_release_singleton(cell, bio);
				1207	remap_and_issue(tc, bio, lookup_result->block);
				1208	}
				1209	}
				1210
				1211	static void provision_block(struct thin_c tc, struct bio bio, dm_block_t block,
				1212	struct cell *cell)
				1213	{
				1214	int r;
				1215	dm_block_t data_block;
				1216
				1217	/*
				1218	* Remap empty bios (flushes) immediately, without provisioning.
				1219	*/
				1220	if (!bio->bi_size) {
				1221	cell_release_singleton(cell, bio);
				1222	remap_and_issue(tc, bio, 0);
				1223	return;
				1224	}
				1225
				1226	/*
				1227	* Fill read bios with zeroes and complete them immediately.
				1228	*/
				1229	if (bio_data_dir(bio) == READ) {
				1230	zero_fill_bio(bio);
				1231	cell_release_singleton(cell, bio);
				1232	bio_endio(bio, 0);
				1233	return;
				1234	}
				1235
				1236	r = alloc_data_block(tc, &data_block);
				1237	switch (r) {
				1238	case 0:
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	1239	if (tc->origin_dev)
				1240	schedule_external_copy(tc, block, data_block, cell, bio);
				1241	else
				1242	schedule_zero(tc, block, data_block, cell, bio);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1243	break;
				1244
				1245	case -ENOSPC:
				1246	no_space(cell);
				1247	break;
				1248
				1249	default:
				1250	DMERR("%s: alloc_data_block() failed, error = %d", __func__, r);
				1251	cell_error(cell);
				1252	break;
				1253	}
				1254	}
				1255
				1256	static void process_bio(struct thin_c tc, struct bio bio)
				1257	{
				1258	int r;
				1259	dm_block_t block = get_bio_block(tc, bio);
				1260	struct cell *cell;
				1261	struct cell_key key;
				1262	struct dm_thin_lookup_result lookup_result;
				1263
				1264	/*
				1265	* If cell is already occupied, then the block is already
				1266	* being provisioned so we have nothing further to do here.
				1267	*/
				1268	build_virtual_key(tc->td, block, &key);
				1269	if (bio_detain(tc->pool->prison, &key, bio, &cell))
				1270	return;
				1271
				1272	r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
				1273	switch (r) {
				1274	case 0:
				1275	/*
				1276	* We can release this cell now. This thread is the only
				1277	* one that puts bios into a cell, and we know there were
				1278	* no preceding bios.
				1279	*/
				1280	/*
				1281	* TODO: this will probably have to change when discard goes
				1282	* back in.
				1283	*/
				1284	cell_release_singleton(cell, bio);
				1285
				1286	if (lookup_result.shared)
				1287	process_shared_bio(tc, bio, block, &lookup_result);
				1288	else
				1289	remap_and_issue(tc, bio, lookup_result.block);
				1290	break;
				1291
				1292	case -ENODATA:
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	1293	if (bio_data_dir(bio) == READ && tc->origin_dev) {
				1294	cell_release_singleton(cell, bio);
				1295	remap_to_origin_and_issue(tc, bio);
				1296	} else
				1297	provision_block(tc, bio, block, cell);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1298	break;
				1299
				1300	default:
				1301	DMERR("dm_thin_find_block() failed, error = %d", r);
				1302	bio_io_error(bio);
				1303	break;
				1304	}
				1305	}
				1306
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	1307	static int need_commit_due_to_time(struct pool *pool)
				1308	{
				1309	return jiffies < pool->last_commit_jiffies \|\|
				1310	jiffies > pool->last_commit_jiffies + COMMIT_PERIOD;
				1311	}
				1312
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1313	static void process_deferred_bios(struct pool *pool)
				1314	{
				1315	unsigned long flags;
				1316	struct bio *bio;
				1317	struct bio_list bios;
				1318	int r;
				1319
				1320	bio_list_init(&bios);
				1321
				1322	spin_lock_irqsave(&pool->lock, flags);
				1323	bio_list_merge(&bios, &pool->deferred_bios);
				1324	bio_list_init(&pool->deferred_bios);
				1325	spin_unlock_irqrestore(&pool->lock, flags);
				1326
				1327	while ((bio = bio_list_pop(&bios))) {
				1328	struct thin_c *tc = dm_get_mapinfo(bio)->ptr;
				1329	/*
				1330	* If we've got no free new_mapping structs, and processing
				1331	* this bio might require one, we pause until there are some
				1332	* prepared mappings to process.
				1333	*/
				1334	if (ensure_next_mapping(pool)) {
				1335	spin_lock_irqsave(&pool->lock, flags);
				1336	bio_list_merge(&pool->deferred_bios, &bios);
				1337	spin_unlock_irqrestore(&pool->lock, flags);
				1338
				1339	break;
				1340	}
				1341	process_bio(tc, bio);
				1342	}
				1343
				1344	/*
				1345	* If there are any deferred flush bios, we must commit
				1346	* the metadata before issuing them.
				1347	*/
				1348	bio_list_init(&bios);
				1349	spin_lock_irqsave(&pool->lock, flags);
				1350	bio_list_merge(&bios, &pool->deferred_flush_bios);
				1351	bio_list_init(&pool->deferred_flush_bios);
				1352	spin_unlock_irqrestore(&pool->lock, flags);
				1353
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	1354	if (bio_list_empty(&bios) && !need_commit_due_to_time(pool))
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1355	return;
				1356
				1357	r = dm_pool_commit_metadata(pool->pmd);
				1358	if (r) {
				1359	DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
				1360	__func__, r);
				1361	while ((bio = bio_list_pop(&bios)))
				1362	bio_io_error(bio);
				1363	return;
				1364	}
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	1365	pool->last_commit_jiffies = jiffies;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1366
				1367	while ((bio = bio_list_pop(&bios)))
				1368	generic_make_request(bio);
				1369	}
				1370
				1371	static void do_worker(struct work_struct *ws)
				1372	{
				1373	struct pool *pool = container_of(ws, struct pool, worker);
				1374
				1375	process_prepared_mappings(pool);
				1376	process_deferred_bios(pool);
				1377	}
				1378
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	1379	/*
				1380	* We want to commit periodically so that not too much
				1381	* unwritten data builds up.
				1382	*/
				1383	static void do_waker(struct work_struct *ws)
				1384	{
				1385	struct pool *pool = container_of(to_delayed_work(ws), struct pool, waker);
				1386	wake_worker(pool);
				1387	queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
				1388	}
				1389
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1390	/----------------------------------------------------------------/
				1391
				1392	/*
				1393	* Mapping functions.
				1394	*/
				1395
				1396	/*
				1397	* Called only while mapping a thin bio to hand it over to the workqueue.
				1398	*/
				1399	static void thin_defer_bio(struct thin_c tc, struct bio bio)
				1400	{
				1401	unsigned long flags;
				1402	struct pool *pool = tc->pool;
				1403
				1404	spin_lock_irqsave(&pool->lock, flags);
				1405	bio_list_add(&pool->deferred_bios, bio);
				1406	spin_unlock_irqrestore(&pool->lock, flags);
				1407
				1408	wake_worker(pool);
				1409	}
				1410
				1411	/*
				1412	* Non-blocking function called from the thin target's map function.
				1413	*/
				1414	static int thin_bio_map(struct dm_target ti, struct bio bio,
				1415	union map_info *map_context)
				1416	{
				1417	int r;
				1418	struct thin_c *tc = ti->private;
				1419	dm_block_t block = get_bio_block(tc, bio);
				1420	struct dm_thin_device *td = tc->td;
				1421	struct dm_thin_lookup_result result;
				1422
				1423	/*
				1424	* Save the thin context for easy access from the deferred bio later.
				1425	*/
				1426	map_context->ptr = tc;
				1427
				1428	if (bio->bi_rw & (REQ_FLUSH \| REQ_FUA)) {
				1429	thin_defer_bio(tc, bio);
				1430	return DM_MAPIO_SUBMITTED;
				1431	}
				1432
				1433	r = dm_thin_find_block(td, block, 0, &result);
				1434
				1435	/*
				1436	* Note that we defer readahead too.
				1437	*/
				1438	switch (r) {
				1439	case 0:
				1440	if (unlikely(result.shared)) {
				1441	/*
				1442	* We have a race condition here between the
				1443	* result.shared value returned by the lookup and
				1444	* snapshot creation, which may cause new
				1445	* sharing.
				1446	*
				1447	* To avoid this always quiesce the origin before
				1448	* taking the snap. You want to do this anyway to
				1449	* ensure a consistent application view
				1450	* (i.e. lockfs).
				1451	*
				1452	* More distant ancestors are irrelevant. The
				1453	* shared flag will be set in their case.
				1454	*/
				1455	thin_defer_bio(tc, bio);
				1456	r = DM_MAPIO_SUBMITTED;
				1457	} else {
				1458	remap(tc, bio, result.block);
				1459	r = DM_MAPIO_REMAPPED;
				1460	}
				1461	break;
				1462
				1463	case -ENODATA:
				1464	/*
				1465	* In future, the failed dm_thin_find_block above could
				1466	* provide the hint to load the metadata into cache.
				1467	*/
				1468	case -EWOULDBLOCK:
				1469	thin_defer_bio(tc, bio);
				1470	r = DM_MAPIO_SUBMITTED;
				1471	break;
				1472	}
				1473
				1474	return r;
				1475	}
				1476
				1477	static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
				1478	{
				1479	int r;
				1480	unsigned long flags;
				1481	struct pool_c *pt = container_of(cb, struct pool_c, callbacks);
				1482
				1483	spin_lock_irqsave(&pt->pool->lock, flags);
				1484	r = !bio_list_empty(&pt->pool->retry_on_resume_list);
				1485	spin_unlock_irqrestore(&pt->pool->lock, flags);
				1486
				1487	if (!r) {
				1488	struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
				1489	r = bdi_congested(&q->backing_dev_info, bdi_bits);
				1490	}
				1491
				1492	return r;
				1493	}
				1494
				1495	static void __requeue_bios(struct pool *pool)
				1496	{
				1497	bio_list_merge(&pool->deferred_bios, &pool->retry_on_resume_list);
				1498	bio_list_init(&pool->retry_on_resume_list);
				1499	}
				1500
				1501	/*----------------------------------------------------------------
				1502	* Binding of control targets to a pool object
				1503	--------------------------------------------------------------/
				1504	static int bind_control_target(struct pool pool, struct dm_target ti)
				1505	{
				1506	struct pool_c *pt = ti->private;
				1507
				1508	pool->ti = ti;
				1509	pool->low_water_blocks = pt->low_water_blocks;
				1510	pool->zero_new_blocks = pt->zero_new_blocks;
				1511
				1512	return 0;
				1513	}
				1514
				1515	static void unbind_control_target(struct pool pool, struct dm_target ti)
				1516	{
				1517	if (pool->ti == ti)
				1518	pool->ti = NULL;
				1519	}
				1520
				1521	/*----------------------------------------------------------------
				1522	* Pool creation
				1523	--------------------------------------------------------------/
				1524	static void __pool_destroy(struct pool *pool)
				1525	{
				1526	__pool_table_remove(pool);
				1527
				1528	if (dm_pool_metadata_close(pool->pmd) < 0)
				1529	DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
				1530
				1531	prison_destroy(pool->prison);
				1532	dm_kcopyd_client_destroy(pool->copier);
				1533
				1534	if (pool->wq)
				1535	destroy_workqueue(pool->wq);
				1536
				1537	if (pool->next_mapping)
				1538	mempool_free(pool->next_mapping, pool->mapping_pool);
				1539	mempool_destroy(pool->mapping_pool);
				1540	mempool_destroy(pool->endio_hook_pool);
				1541	kfree(pool);
				1542	}
				1543
				1544	static struct pool pool_create(struct mapped_device pool_md,
				1545	struct block_device *metadata_dev,
				1546	unsigned long block_size, char **error)
				1547	{
				1548	int r;
				1549	void *err_p;
				1550	struct pool *pool;
				1551	struct dm_pool_metadata *pmd;
				1552
				1553	pmd = dm_pool_metadata_open(metadata_dev, block_size);
				1554	if (IS_ERR(pmd)) {
				1555	*error = "Error creating metadata object";
				1556	return (struct pool *)pmd;
				1557	}
				1558
				1559	pool = kmalloc(sizeof(*pool), GFP_KERNEL);
				1560	if (!pool) {
				1561	*error = "Error allocating memory for pool";
				1562	err_p = ERR_PTR(-ENOMEM);
				1563	goto bad_pool;
				1564	}
				1565
				1566	pool->pmd = pmd;
				1567	pool->sectors_per_block = block_size;
				1568	pool->block_shift = ffs(block_size) - 1;
				1569	pool->offset_mask = block_size - 1;
				1570	pool->low_water_blocks = 0;
				1571	pool->zero_new_blocks = 1;
				1572	pool->prison = prison_create(PRISON_CELLS);
				1573	if (!pool->prison) {
				1574	*error = "Error creating pool's bio prison";
				1575	err_p = ERR_PTR(-ENOMEM);
				1576	goto bad_prison;
				1577	}
				1578
				1579	pool->copier = dm_kcopyd_client_create();
				1580	if (IS_ERR(pool->copier)) {
				1581	r = PTR_ERR(pool->copier);
				1582	*error = "Error creating pool's kcopyd client";
				1583	err_p = ERR_PTR(r);
				1584	goto bad_kcopyd_client;
				1585	}
				1586
				1587	/*
				1588	* Create singlethreaded workqueue that will service all devices
				1589	* that use this metadata.
				1590	*/
				1591	pool->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
				1592	if (!pool->wq) {
				1593	*error = "Error creating pool's workqueue";
				1594	err_p = ERR_PTR(-ENOMEM);
				1595	goto bad_wq;
				1596	}
				1597
				1598	INIT_WORK(&pool->worker, do_worker);
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	1599	INIT_DELAYED_WORK(&pool->waker, do_waker);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1600	spin_lock_init(&pool->lock);
				1601	bio_list_init(&pool->deferred_bios);
				1602	bio_list_init(&pool->deferred_flush_bios);
				1603	INIT_LIST_HEAD(&pool->prepared_mappings);
				1604	pool->low_water_triggered = 0;
				1605	pool->no_free_space = 0;
				1606	bio_list_init(&pool->retry_on_resume_list);
				1607	ds_init(&pool->ds);
				1608
				1609	pool->next_mapping = NULL;
				1610	pool->mapping_pool =
				1611	mempool_create_kmalloc_pool(MAPPING_POOL_SIZE, sizeof(struct new_mapping));
				1612	if (!pool->mapping_pool) {
				1613	*error = "Error creating pool's mapping mempool";
				1614	err_p = ERR_PTR(-ENOMEM);
				1615	goto bad_mapping_pool;
				1616	}
				1617
				1618	pool->endio_hook_pool =
				1619	mempool_create_kmalloc_pool(ENDIO_HOOK_POOL_SIZE, sizeof(struct endio_hook));
				1620	if (!pool->endio_hook_pool) {
				1621	*error = "Error creating pool's endio_hook mempool";
				1622	err_p = ERR_PTR(-ENOMEM);
				1623	goto bad_endio_hook_pool;
				1624	}
				1625	pool->ref_count = 1;
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	1626	pool->last_commit_jiffies = jiffies;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1627	pool->pool_md = pool_md;
				1628	pool->md_dev = metadata_dev;
				1629	__pool_table_insert(pool);
				1630
				1631	return pool;
				1632
				1633	bad_endio_hook_pool:
				1634	mempool_destroy(pool->mapping_pool);
				1635	bad_mapping_pool:
				1636	destroy_workqueue(pool->wq);
				1637	bad_wq:
				1638	dm_kcopyd_client_destroy(pool->copier);
				1639	bad_kcopyd_client:
				1640	prison_destroy(pool->prison);
				1641	bad_prison:
				1642	kfree(pool);
				1643	bad_pool:
				1644	if (dm_pool_metadata_close(pmd))
				1645	DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
				1646
				1647	return err_p;
				1648	}
				1649
				1650	static void __pool_inc(struct pool *pool)
				1651	{
				1652	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
				1653	pool->ref_count++;
				1654	}
				1655
				1656	static void __pool_dec(struct pool *pool)
				1657	{
				1658	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
				1659	BUG_ON(!pool->ref_count);
				1660	if (!--pool->ref_count)
				1661	__pool_destroy(pool);
				1662	}
				1663
				1664	static struct pool __pool_find(struct mapped_device pool_md,
				1665	struct block_device *metadata_dev,
				1666	unsigned long block_size, char **error)
				1667	{
				1668	struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev);
				1669
				1670	if (pool) {
				1671	if (pool->pool_md != pool_md)
				1672	return ERR_PTR(-EBUSY);
				1673	__pool_inc(pool);
				1674
				1675	} else {
				1676	pool = __pool_table_lookup(pool_md);
				1677	if (pool) {
				1678	if (pool->md_dev != metadata_dev)
				1679	return ERR_PTR(-EINVAL);
				1680	__pool_inc(pool);
				1681
				1682	} else
				1683	pool = pool_create(pool_md, metadata_dev, block_size, error);
				1684	}
				1685
				1686	return pool;
				1687	}
				1688
				1689	/*----------------------------------------------------------------
				1690	* Pool target methods
				1691	--------------------------------------------------------------/
				1692	static void pool_dtr(struct dm_target *ti)
				1693	{
				1694	struct pool_c *pt = ti->private;
				1695
				1696	mutex_lock(&dm_thin_pool_table.mutex);
				1697
				1698	unbind_control_target(pt->pool, ti);
				1699	__pool_dec(pt->pool);
				1700	dm_put_device(ti, pt->metadata_dev);
				1701	dm_put_device(ti, pt->data_dev);
				1702	kfree(pt);
				1703
				1704	mutex_unlock(&dm_thin_pool_table.mutex);
				1705	}
				1706
				1707	struct pool_features {
				1708	unsigned zero_new_blocks:1;
				1709	};
				1710
				1711	static int parse_pool_features(struct dm_arg_set as, struct pool_features pf,
				1712	struct dm_target *ti)
				1713	{
				1714	int r;
				1715	unsigned argc;
				1716	const char *arg_name;
				1717
				1718	static struct dm_arg _args[] = {
				1719	{0, 1, "Invalid number of pool feature arguments"},
				1720	};
				1721
				1722	/*
				1723	* No feature arguments supplied.
				1724	*/
				1725	if (!as->argc)
				1726	return 0;
				1727
				1728	r = dm_read_arg_group(_args, as, &argc, &ti->error);
				1729	if (r)
				1730	return -EINVAL;
				1731
				1732	while (argc && !r) {
				1733	arg_name = dm_shift_arg(as);
				1734	argc--;
				1735
				1736	if (!strcasecmp(arg_name, "skip_block_zeroing")) {
				1737	pf->zero_new_blocks = 0;
				1738	continue;
				1739	}
				1740
				1741	ti->error = "Unrecognised pool feature requested";
				1742	r = -EINVAL;
				1743	}
				1744
				1745	return r;
				1746	}
				1747
				1748	/*
				1749	* thin-pool <metadata dev> <data dev>
				1750	* <data block size (sectors)>
				1751	* <low water mark (blocks)>
				1752	* [<#feature args> [<arg>]*]
				1753	*
				1754	* Optional feature arguments are:
				1755	* skip_block_zeroing: skips the zeroing of newly-provisioned blocks.
				1756	*/
				1757	static int pool_ctr(struct dm_target ti, unsigned argc, char *argv)
				1758	{
				1759	int r;
				1760	struct pool_c *pt;
				1761	struct pool *pool;
				1762	struct pool_features pf;
				1763	struct dm_arg_set as;
				1764	struct dm_dev *data_dev;
				1765	unsigned long block_size;
				1766	dm_block_t low_water_blocks;
				1767	struct dm_dev *metadata_dev;
				1768	sector_t metadata_dev_size;
Mike Snitzer	c4a69ec	2012-03-28 18:41:28 +0100	[diff] [blame]	1769	char b[BDEVNAME_SIZE];
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1770
				1771	/*
				1772	* FIXME Remove validation from scope of lock.
				1773	*/
				1774	mutex_lock(&dm_thin_pool_table.mutex);
				1775
				1776	if (argc < 4) {
				1777	ti->error = "Invalid argument count";
				1778	r = -EINVAL;
				1779	goto out_unlock;
				1780	}
				1781	as.argc = argc;
				1782	as.argv = argv;
				1783
				1784	r = dm_get_device(ti, argv[0], FMODE_READ \| FMODE_WRITE, &metadata_dev);
				1785	if (r) {
				1786	ti->error = "Error opening metadata block device";
				1787	goto out_unlock;
				1788	}
				1789
				1790	metadata_dev_size = i_size_read(metadata_dev->bdev->bd_inode) >> SECTOR_SHIFT;
Mike Snitzer	c4a69ec	2012-03-28 18:41:28 +0100	[diff] [blame]	1791	if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING)
				1792	DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
				1793	bdevname(metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1794
				1795	r = dm_get_device(ti, argv[1], FMODE_READ \| FMODE_WRITE, &data_dev);
				1796	if (r) {
				1797	ti->error = "Error getting data device";
				1798	goto out_metadata;
				1799	}
				1800
				1801	if (kstrtoul(argv[2], 10, &block_size) \|\| !block_size \|\|
				1802	block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS \|\|
				1803	block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS \|\|
				1804	!is_power_of_2(block_size)) {
				1805	ti->error = "Invalid block size";
				1806	r = -EINVAL;
				1807	goto out;
				1808	}
				1809
				1810	if (kstrtoull(argv[3], 10, (unsigned long long *)&low_water_blocks)) {
				1811	ti->error = "Invalid low water mark";
				1812	r = -EINVAL;
				1813	goto out;
				1814	}
				1815
				1816	/*
				1817	* Set default pool features.
				1818	*/
				1819	memset(&pf, 0, sizeof(pf));
				1820	pf.zero_new_blocks = 1;
				1821
				1822	dm_consume_args(&as, 4);
				1823	r = parse_pool_features(&as, &pf, ti);
				1824	if (r)
				1825	goto out;
				1826
				1827	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
				1828	if (!pt) {
				1829	r = -ENOMEM;
				1830	goto out;
				1831	}
				1832
				1833	pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev,
				1834	block_size, &ti->error);
				1835	if (IS_ERR(pool)) {
				1836	r = PTR_ERR(pool);
				1837	goto out_free_pt;
				1838	}
				1839
				1840	pt->pool = pool;
				1841	pt->ti = ti;
				1842	pt->metadata_dev = metadata_dev;
				1843	pt->data_dev = data_dev;
				1844	pt->low_water_blocks = low_water_blocks;
				1845	pt->zero_new_blocks = pf.zero_new_blocks;
				1846	ti->num_flush_requests = 1;
				1847	ti->num_discard_requests = 0;
				1848	ti->private = pt;
				1849
				1850	pt->callbacks.congested_fn = pool_is_congested;
				1851	dm_table_add_target_callbacks(ti->table, &pt->callbacks);
				1852
				1853	mutex_unlock(&dm_thin_pool_table.mutex);
				1854
				1855	return 0;
				1856
				1857	out_free_pt:
				1858	kfree(pt);
				1859	out:
				1860	dm_put_device(ti, data_dev);
				1861	out_metadata:
				1862	dm_put_device(ti, metadata_dev);
				1863	out_unlock:
				1864	mutex_unlock(&dm_thin_pool_table.mutex);
				1865
				1866	return r;
				1867	}
				1868
				1869	static int pool_map(struct dm_target ti, struct bio bio,
				1870	union map_info *map_context)
				1871	{
				1872	int r;
				1873	struct pool_c *pt = ti->private;
				1874	struct pool *pool = pt->pool;
				1875	unsigned long flags;
				1876
				1877	/*
				1878	* As this is a singleton target, ti->begin is always zero.
				1879	*/
				1880	spin_lock_irqsave(&pool->lock, flags);
				1881	bio->bi_bdev = pt->data_dev->bdev;
				1882	r = DM_MAPIO_REMAPPED;
				1883	spin_unlock_irqrestore(&pool->lock, flags);
				1884
				1885	return r;
				1886	}
				1887
				1888	/*
				1889	* Retrieves the number of blocks of the data device from
				1890	* the superblock and compares it to the actual device size,
				1891	* thus resizing the data device in case it has grown.
				1892	*
				1893	* This both copes with opening preallocated data devices in the ctr
				1894	* being followed by a resume
				1895	* -and-
				1896	* calling the resume method individually after userspace has
				1897	* grown the data device in reaction to a table event.
				1898	*/
				1899	static int pool_preresume(struct dm_target *ti)
				1900	{
				1901	int r;
				1902	struct pool_c *pt = ti->private;
				1903	struct pool *pool = pt->pool;
				1904	dm_block_t data_size, sb_data_size;
				1905
				1906	/*
				1907	* Take control of the pool object.
				1908	*/
				1909	r = bind_control_target(pool, ti);
				1910	if (r)
				1911	return r;
				1912
				1913	data_size = ti->len >> pool->block_shift;
				1914	r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
				1915	if (r) {
				1916	DMERR("failed to retrieve data device size");
				1917	return r;
				1918	}
				1919
				1920	if (data_size < sb_data_size) {
				1921	DMERR("pool target too small, is %llu blocks (expected %llu)",
				1922	data_size, sb_data_size);
				1923	return -EINVAL;
				1924
				1925	} else if (data_size > sb_data_size) {
				1926	r = dm_pool_resize_data_dev(pool->pmd, data_size);
				1927	if (r) {
				1928	DMERR("failed to resize data device");
				1929	return r;
				1930	}
				1931
				1932	r = dm_pool_commit_metadata(pool->pmd);
				1933	if (r) {
				1934	DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
				1935	__func__, r);
				1936	return r;
				1937	}
				1938	}
				1939
				1940	return 0;
				1941	}
				1942
				1943	static void pool_resume(struct dm_target *ti)
				1944	{
				1945	struct pool_c *pt = ti->private;
				1946	struct pool *pool = pt->pool;
				1947	unsigned long flags;
				1948
				1949	spin_lock_irqsave(&pool->lock, flags);
				1950	pool->low_water_triggered = 0;
				1951	pool->no_free_space = 0;
				1952	__requeue_bios(pool);
				1953	spin_unlock_irqrestore(&pool->lock, flags);
				1954
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	1955	do_waker(&pool->waker.work);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1956	}
				1957
				1958	static void pool_postsuspend(struct dm_target *ti)
				1959	{
				1960	int r;
				1961	struct pool_c *pt = ti->private;
				1962	struct pool *pool = pt->pool;
				1963
Joe Thornber	905e51b	2012-03-28 18:41:27 +0100	[diff] [blame]	1964	cancel_delayed_work(&pool->waker);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	1965	flush_workqueue(pool->wq);
				1966
				1967	r = dm_pool_commit_metadata(pool->pmd);
				1968	if (r < 0) {
				1969	DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
				1970	__func__, r);
				1971	/* FIXME: invalidate device? error the next FUA or FLUSH bio ?*/
				1972	}
				1973	}
				1974
				1975	static int check_arg_count(unsigned argc, unsigned args_required)
				1976	{
				1977	if (argc != args_required) {
				1978	DMWARN("Message received with %u arguments instead of %u.",
				1979	argc, args_required);
				1980	return -EINVAL;
				1981	}
				1982
				1983	return 0;
				1984	}
				1985
				1986	static int read_dev_id(char arg, dm_thin_id dev_id, int warning)
				1987	{
				1988	if (!kstrtoull(arg, 10, (unsigned long long *)dev_id) &&
				1989	*dev_id <= MAX_DEV_ID)
				1990	return 0;
				1991
				1992	if (warning)
				1993	DMWARN("Message received with invalid device id: %s", arg);
				1994
				1995	return -EINVAL;
				1996	}
				1997
				1998	static int process_create_thin_mesg(unsigned argc, char *argv, struct pool pool)
				1999	{
				2000	dm_thin_id dev_id;
				2001	int r;
				2002
				2003	r = check_arg_count(argc, 2);
				2004	if (r)
				2005	return r;
				2006
				2007	r = read_dev_id(argv[1], &dev_id, 1);
				2008	if (r)
				2009	return r;
				2010
				2011	r = dm_pool_create_thin(pool->pmd, dev_id);
				2012	if (r) {
				2013	DMWARN("Creation of new thinly-provisioned device with id %s failed.",
				2014	argv[1]);
				2015	return r;
				2016	}
				2017
				2018	return 0;
				2019	}
				2020
				2021	static int process_create_snap_mesg(unsigned argc, char *argv, struct pool pool)
				2022	{
				2023	dm_thin_id dev_id;
				2024	dm_thin_id origin_dev_id;
				2025	int r;
				2026
				2027	r = check_arg_count(argc, 3);
				2028	if (r)
				2029	return r;
				2030
				2031	r = read_dev_id(argv[1], &dev_id, 1);
				2032	if (r)
				2033	return r;
				2034
				2035	r = read_dev_id(argv[2], &origin_dev_id, 1);
				2036	if (r)
				2037	return r;
				2038
				2039	r = dm_pool_create_snap(pool->pmd, dev_id, origin_dev_id);
				2040	if (r) {
				2041	DMWARN("Creation of new snapshot %s of device %s failed.",
				2042	argv[1], argv[2]);
				2043	return r;
				2044	}
				2045
				2046	return 0;
				2047	}
				2048
				2049	static int process_delete_mesg(unsigned argc, char *argv, struct pool pool)
				2050	{
				2051	dm_thin_id dev_id;
				2052	int r;
				2053
				2054	r = check_arg_count(argc, 2);
				2055	if (r)
				2056	return r;
				2057
				2058	r = read_dev_id(argv[1], &dev_id, 1);
				2059	if (r)
				2060	return r;
				2061
				2062	r = dm_pool_delete_thin_device(pool->pmd, dev_id);
				2063	if (r)
				2064	DMWARN("Deletion of thin device %s failed.", argv[1]);
				2065
				2066	return r;
				2067	}
				2068
				2069	static int process_set_transaction_id_mesg(unsigned argc, char *argv, struct pool pool)
				2070	{
				2071	dm_thin_id old_id, new_id;
				2072	int r;
				2073
				2074	r = check_arg_count(argc, 3);
				2075	if (r)
				2076	return r;
				2077
				2078	if (kstrtoull(argv[1], 10, (unsigned long long *)&old_id)) {
				2079	DMWARN("set_transaction_id message: Unrecognised id %s.", argv[1]);
				2080	return -EINVAL;
				2081	}
				2082
				2083	if (kstrtoull(argv[2], 10, (unsigned long long *)&new_id)) {
				2084	DMWARN("set_transaction_id message: Unrecognised new id %s.", argv[2]);
				2085	return -EINVAL;
				2086	}
				2087
				2088	r = dm_pool_set_metadata_transaction_id(pool->pmd, old_id, new_id);
				2089	if (r) {
				2090	DMWARN("Failed to change transaction id from %s to %s.",
				2091	argv[1], argv[2]);
				2092	return r;
				2093	}
				2094
				2095	return 0;
				2096	}
				2097
				2098	/*
				2099	* Messages supported:
				2100	* create_thin <dev_id>
				2101	* create_snap <dev_id> <origin_id>
				2102	* delete <dev_id>
				2103	* trim <dev_id> <new_size_in_sectors>
				2104	* set_transaction_id <current_trans_id> <new_trans_id>
				2105	*/
				2106	static int pool_message(struct dm_target ti, unsigned argc, char *argv)
				2107	{
				2108	int r = -EINVAL;
				2109	struct pool_c *pt = ti->private;
				2110	struct pool *pool = pt->pool;
				2111
				2112	if (!strcasecmp(argv[0], "create_thin"))
				2113	r = process_create_thin_mesg(argc, argv, pool);
				2114
				2115	else if (!strcasecmp(argv[0], "create_snap"))
				2116	r = process_create_snap_mesg(argc, argv, pool);
				2117
				2118	else if (!strcasecmp(argv[0], "delete"))
				2119	r = process_delete_mesg(argc, argv, pool);
				2120
				2121	else if (!strcasecmp(argv[0], "set_transaction_id"))
				2122	r = process_set_transaction_id_mesg(argc, argv, pool);
				2123
				2124	else
				2125	DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
				2126
				2127	if (!r) {
				2128	r = dm_pool_commit_metadata(pool->pmd);
				2129	if (r)
				2130	DMERR("%s message: dm_pool_commit_metadata() failed, error = %d",
				2131	argv[0], r);
				2132	}
				2133
				2134	return r;
				2135	}
				2136
				2137	/*
				2138	* Status line is:
				2139	* <transaction id> <used metadata sectors>/<total metadata sectors>
				2140	* <used data sectors>/<total data sectors> <held metadata root>
				2141	*/
				2142	static int pool_status(struct dm_target *ti, status_type_t type,
				2143	char *result, unsigned maxlen)
				2144	{
				2145	int r;
				2146	unsigned sz = 0;
				2147	uint64_t transaction_id;
				2148	dm_block_t nr_free_blocks_data;
				2149	dm_block_t nr_free_blocks_metadata;
				2150	dm_block_t nr_blocks_data;
				2151	dm_block_t nr_blocks_metadata;
				2152	dm_block_t held_root;
				2153	char buf[BDEVNAME_SIZE];
				2154	char buf2[BDEVNAME_SIZE];
				2155	struct pool_c *pt = ti->private;
				2156	struct pool *pool = pt->pool;
				2157
				2158	switch (type) {
				2159	case STATUSTYPE_INFO:
				2160	r = dm_pool_get_metadata_transaction_id(pool->pmd,
				2161	&transaction_id);
				2162	if (r)
				2163	return r;
				2164
				2165	r = dm_pool_get_free_metadata_block_count(pool->pmd,
				2166	&nr_free_blocks_metadata);
				2167	if (r)
				2168	return r;
				2169
				2170	r = dm_pool_get_metadata_dev_size(pool->pmd, &nr_blocks_metadata);
				2171	if (r)
				2172	return r;
				2173
				2174	r = dm_pool_get_free_block_count(pool->pmd,
				2175	&nr_free_blocks_data);
				2176	if (r)
				2177	return r;
				2178
				2179	r = dm_pool_get_data_dev_size(pool->pmd, &nr_blocks_data);
				2180	if (r)
				2181	return r;
				2182
				2183	r = dm_pool_get_held_metadata_root(pool->pmd, &held_root);
				2184	if (r)
				2185	return r;
				2186
				2187	DMEMIT("%llu %llu/%llu %llu/%llu ",
				2188	(unsigned long long)transaction_id,
				2189	(unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
				2190	(unsigned long long)nr_blocks_metadata,
				2191	(unsigned long long)(nr_blocks_data - nr_free_blocks_data),
				2192	(unsigned long long)nr_blocks_data);
				2193
				2194	if (held_root)
				2195	DMEMIT("%llu", held_root);
				2196	else
				2197	DMEMIT("-");
				2198
				2199	break;
				2200
				2201	case STATUSTYPE_TABLE:
				2202	DMEMIT("%s %s %lu %llu ",
				2203	format_dev_t(buf, pt->metadata_dev->bdev->bd_dev),
				2204	format_dev_t(buf2, pt->data_dev->bdev->bd_dev),
				2205	(unsigned long)pool->sectors_per_block,
				2206	(unsigned long long)pt->low_water_blocks);
				2207
				2208	DMEMIT("%u ", !pool->zero_new_blocks);
				2209
				2210	if (!pool->zero_new_blocks)
				2211	DMEMIT("skip_block_zeroing ");
				2212	break;
				2213	}
				2214
				2215	return 0;
				2216	}
				2217
				2218	static int pool_iterate_devices(struct dm_target *ti,
				2219	iterate_devices_callout_fn fn, void *data)
				2220	{
				2221	struct pool_c *pt = ti->private;
				2222
				2223	return fn(ti, pt->data_dev, 0, ti->len, data);
				2224	}
				2225
				2226	static int pool_merge(struct dm_target ti, struct bvec_merge_data bvm,
				2227	struct bio_vec *biovec, int max_size)
				2228	{
				2229	struct pool_c *pt = ti->private;
				2230	struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
				2231
				2232	if (!q->merge_bvec_fn)
				2233	return max_size;
				2234
				2235	bvm->bi_bdev = pt->data_dev->bdev;
				2236
				2237	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
				2238	}
				2239
				2240	static void pool_io_hints(struct dm_target ti, struct queue_limits limits)
				2241	{
				2242	struct pool_c *pt = ti->private;
				2243	struct pool *pool = pt->pool;
				2244
				2245	blk_limits_io_min(limits, 0);
				2246	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
				2247	}
				2248
				2249	static struct target_type pool_target = {
				2250	.name = "thin-pool",
				2251	.features = DM_TARGET_SINGLETON \| DM_TARGET_ALWAYS_WRITEABLE \|
				2252	DM_TARGET_IMMUTABLE,
				2253	.version = {1, 0, 0},
				2254	.module = THIS_MODULE,
				2255	.ctr = pool_ctr,
				2256	.dtr = pool_dtr,
				2257	.map = pool_map,
				2258	.postsuspend = pool_postsuspend,
				2259	.preresume = pool_preresume,
				2260	.resume = pool_resume,
				2261	.message = pool_message,
				2262	.status = pool_status,
				2263	.merge = pool_merge,
				2264	.iterate_devices = pool_iterate_devices,
				2265	.io_hints = pool_io_hints,
				2266	};
				2267
				2268	/*----------------------------------------------------------------
				2269	* Thin target methods
				2270	--------------------------------------------------------------/
				2271	static void thin_dtr(struct dm_target *ti)
				2272	{
				2273	struct thin_c *tc = ti->private;
				2274
				2275	mutex_lock(&dm_thin_pool_table.mutex);
				2276
				2277	__pool_dec(tc->pool);
				2278	dm_pool_close_thin_device(tc->td);
				2279	dm_put_device(ti, tc->pool_dev);
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	2280	if (tc->origin_dev)
				2281	dm_put_device(ti, tc->origin_dev);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2282	kfree(tc);
				2283
				2284	mutex_unlock(&dm_thin_pool_table.mutex);
				2285	}
				2286
				2287	/*
				2288	* Thin target parameters:
				2289	*
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	2290	* <pool_dev> <dev_id> [origin_dev]
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2291	*
				2292	* pool_dev: the path to the pool (eg, /dev/mapper/my_pool)
				2293	* dev_id: the internal device identifier
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	2294	* origin_dev: a device external to the pool that should act as the origin
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2295	*/
				2296	static int thin_ctr(struct dm_target ti, unsigned argc, char *argv)
				2297	{
				2298	int r;
				2299	struct thin_c *tc;
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	2300	struct dm_dev pool_dev, origin_dev;
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2301	struct mapped_device *pool_md;
				2302
				2303	mutex_lock(&dm_thin_pool_table.mutex);
				2304
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	2305	if (argc != 2 && argc != 3) {
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2306	ti->error = "Invalid argument count";
				2307	r = -EINVAL;
				2308	goto out_unlock;
				2309	}
				2310
				2311	tc = ti->private = kzalloc(sizeof(*tc), GFP_KERNEL);
				2312	if (!tc) {
				2313	ti->error = "Out of memory";
				2314	r = -ENOMEM;
				2315	goto out_unlock;
				2316	}
				2317
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	2318	if (argc == 3) {
				2319	r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev);
				2320	if (r) {
				2321	ti->error = "Error opening origin device";
				2322	goto bad_origin_dev;
				2323	}
				2324	tc->origin_dev = origin_dev;
				2325	}
				2326
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2327	r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &pool_dev);
				2328	if (r) {
				2329	ti->error = "Error opening pool device";
				2330	goto bad_pool_dev;
				2331	}
				2332	tc->pool_dev = pool_dev;
				2333
				2334	if (read_dev_id(argv[1], (unsigned long long *)&tc->dev_id, 0)) {
				2335	ti->error = "Invalid device id";
				2336	r = -EINVAL;
				2337	goto bad_common;
				2338	}
				2339
				2340	pool_md = dm_get_md(tc->pool_dev->bdev->bd_dev);
				2341	if (!pool_md) {
				2342	ti->error = "Couldn't get pool mapped device";
				2343	r = -EINVAL;
				2344	goto bad_common;
				2345	}
				2346
				2347	tc->pool = __pool_table_lookup(pool_md);
				2348	if (!tc->pool) {
				2349	ti->error = "Couldn't find pool object";
				2350	r = -EINVAL;
				2351	goto bad_pool_lookup;
				2352	}
				2353	__pool_inc(tc->pool);
				2354
				2355	r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td);
				2356	if (r) {
				2357	ti->error = "Couldn't open thin internal device";
				2358	goto bad_thin_open;
				2359	}
				2360
				2361	ti->split_io = tc->pool->sectors_per_block;
				2362	ti->num_flush_requests = 1;
				2363	ti->num_discard_requests = 0;
				2364	ti->discards_supported = 0;
				2365
				2366	dm_put(pool_md);
				2367
				2368	mutex_unlock(&dm_thin_pool_table.mutex);
				2369
				2370	return 0;
				2371
				2372	bad_thin_open:
				2373	__pool_dec(tc->pool);
				2374	bad_pool_lookup:
				2375	dm_put(pool_md);
				2376	bad_common:
				2377	dm_put_device(ti, tc->pool_dev);
				2378	bad_pool_dev:
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	2379	if (tc->origin_dev)
				2380	dm_put_device(ti, tc->origin_dev);
				2381	bad_origin_dev:
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2382	kfree(tc);
				2383	out_unlock:
				2384	mutex_unlock(&dm_thin_pool_table.mutex);
				2385
				2386	return r;
				2387	}
				2388
				2389	static int thin_map(struct dm_target ti, struct bio bio,
				2390	union map_info *map_context)
				2391	{
Alasdair G Kergon	6efd6e8	2012-03-28 18:41:28 +0100	[diff] [blame^]	2392	bio->bi_sector = dm_target_offset(ti, bio->bi_sector);
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2393
				2394	return thin_bio_map(ti, bio, map_context);
				2395	}
				2396
				2397	static void thin_postsuspend(struct dm_target *ti)
				2398	{
				2399	if (dm_noflush_suspending(ti))
				2400	requeue_io((struct thin_c *)ti->private);
				2401	}
				2402
				2403	/*
				2404	* <nr mapped sectors> <highest mapped sector>
				2405	*/
				2406	static int thin_status(struct dm_target *ti, status_type_t type,
				2407	char *result, unsigned maxlen)
				2408	{
				2409	int r;
				2410	ssize_t sz = 0;
				2411	dm_block_t mapped, highest;
				2412	char buf[BDEVNAME_SIZE];
				2413	struct thin_c *tc = ti->private;
				2414
				2415	if (!tc->td)
				2416	DMEMIT("-");
				2417	else {
				2418	switch (type) {
				2419	case STATUSTYPE_INFO:
				2420	r = dm_thin_get_mapped_count(tc->td, &mapped);
				2421	if (r)
				2422	return r;
				2423
				2424	r = dm_thin_get_highest_mapped_block(tc->td, &highest);
				2425	if (r < 0)
				2426	return r;
				2427
				2428	DMEMIT("%llu ", mapped * tc->pool->sectors_per_block);
				2429	if (r)
				2430	DMEMIT("%llu", ((highest + 1) *
				2431	tc->pool->sectors_per_block) - 1);
				2432	else
				2433	DMEMIT("-");
				2434	break;
				2435
				2436	case STATUSTYPE_TABLE:
				2437	DMEMIT("%s %lu",
				2438	format_dev_t(buf, tc->pool_dev->bdev->bd_dev),
				2439	(unsigned long) tc->dev_id);
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	2440	if (tc->origin_dev)
				2441	DMEMIT(" %s", format_dev_t(buf, tc->origin_dev->bdev->bd_dev));
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2442	break;
				2443	}
				2444	}
				2445
				2446	return 0;
				2447	}
				2448
				2449	static int thin_iterate_devices(struct dm_target *ti,
				2450	iterate_devices_callout_fn fn, void *data)
				2451	{
				2452	dm_block_t blocks;
				2453	struct thin_c *tc = ti->private;
				2454
				2455	/*
				2456	* We can't call dm_pool_get_data_dev_size() since that blocks. So
				2457	* we follow a more convoluted path through to the pool's target.
				2458	*/
				2459	if (!tc->pool->ti)
				2460	return 0; /* nothing is bound */
				2461
				2462	blocks = tc->pool->ti->len >> tc->pool->block_shift;
				2463	if (blocks)
				2464	return fn(ti, tc->pool_dev, 0, tc->pool->sectors_per_block * blocks, data);
				2465
				2466	return 0;
				2467	}
				2468
				2469	static void thin_io_hints(struct dm_target ti, struct queue_limits limits)
				2470	{
				2471	struct thin_c *tc = ti->private;
				2472
				2473	blk_limits_io_min(limits, 0);
				2474	blk_limits_io_opt(limits, tc->pool->sectors_per_block << SECTOR_SHIFT);
				2475	}
				2476
				2477	static struct target_type thin_target = {
				2478	.name = "thin",
Joe Thornber	2dd9c25	2012-03-28 18:41:28 +0100	[diff] [blame]	2479	.version = {1, 1, 0},
Joe Thornber	991d9fa	2011-10-31 20:21:18 +0000	[diff] [blame]	2480	.module = THIS_MODULE,
				2481	.ctr = thin_ctr,
				2482	.dtr = thin_dtr,
				2483	.map = thin_map,
				2484	.postsuspend = thin_postsuspend,
				2485	.status = thin_status,
				2486	.iterate_devices = thin_iterate_devices,
				2487	.io_hints = thin_io_hints,
				2488	};
				2489
				2490	/----------------------------------------------------------------/
				2491
				2492	static int __init dm_thin_init(void)
				2493	{
				2494	int r;
				2495
				2496	pool_table_init();
				2497
				2498	r = dm_register_target(&thin_target);
				2499	if (r)
				2500	return r;
				2501
				2502	r = dm_register_target(&pool_target);
				2503	if (r)
				2504	dm_unregister_target(&thin_target);
				2505
				2506	return r;
				2507	}
				2508
				2509	static void dm_thin_exit(void)
				2510	{
				2511	dm_unregister_target(&thin_target);
				2512	dm_unregister_target(&pool_target);
				2513	}
				2514
				2515	module_init(dm_thin_init);
				2516	module_exit(dm_thin_exit);
				2517
				2518	MODULE_DESCRIPTION(DM_NAME "device-mapper thin provisioning target");
				2519	MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
				2520	MODULE_LICENSE("GPL");