blob: a9210bb594e73f15a4e8aa9643366e2f823d7392 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
Milan Broz784aae72009-01-06 03:05:12 +00003 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 *
5 * This file is released under the GPL.
6 */
7
8#include "dm.h"
Mike Anderson51e5b2b2007-10-19 22:48:00 +01009#include "dm-uevent.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070010
11#include <linux/init.h>
12#include <linux/module.h>
Arjan van de Ven48c9c272006-03-27 01:18:20 -080013#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/moduleparam.h>
15#include <linux/blkpg.h>
16#include <linux/bio.h>
17#include <linux/buffer_head.h>
18#include <linux/mempool.h>
19#include <linux/slab.h>
20#include <linux/idr.h>
Darrick J. Wong3ac51e72006-03-27 01:17:54 -080021#include <linux/hdreg.h>
Li Zefan55782132009-06-09 13:43:05 +080022
23#include <trace/events/block.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024
Alasdair G Kergon72d94862006-06-26 00:27:35 -070025#define DM_MSG_PREFIX "core"
26
Milan Broz60935eb2009-06-22 10:12:30 +010027/*
28 * Cookies are numeric values sent with CHANGE and REMOVE
29 * uevents while resuming, removing or renaming the device.
30 */
31#define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE"
32#define DM_COOKIE_LENGTH 24
33
Linus Torvalds1da177e2005-04-16 15:20:36 -070034static const char *_name = DM_NAME;
35
36static unsigned int major = 0;
37static unsigned int _major = 0;
38
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -070039static DEFINE_SPINLOCK(_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070040/*
Kiyoshi Ueda8fbf26a2009-01-06 03:05:06 +000041 * For bio-based dm.
Linus Torvalds1da177e2005-04-16 15:20:36 -070042 * One of these is allocated per bio.
43 */
44struct dm_io {
45 struct mapped_device *md;
46 int error;
Linus Torvalds1da177e2005-04-16 15:20:36 -070047 atomic_t io_count;
Richard Kennedy6ae2fa62008-07-21 12:00:28 +010048 struct bio *bio;
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -080049 unsigned long start_time;
Linus Torvalds1da177e2005-04-16 15:20:36 -070050};
51
52/*
Kiyoshi Ueda8fbf26a2009-01-06 03:05:06 +000053 * For bio-based dm.
Linus Torvalds1da177e2005-04-16 15:20:36 -070054 * One of these is allocated per target within a bio. Hopefully
55 * this will be simplified out one day.
56 */
Alasdair G Kergon028867a2007-07-12 17:26:32 +010057struct dm_target_io {
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 struct dm_io *io;
59 struct dm_target *ti;
60 union map_info info;
61};
62
Kiyoshi Ueda8fbf26a2009-01-06 03:05:06 +000063/*
64 * For request-based dm.
65 * One of these is allocated per request.
66 */
67struct dm_rq_target_io {
68 struct mapped_device *md;
69 struct dm_target *ti;
70 struct request *orig, clone;
71 int error;
72 union map_info info;
73};
74
75/*
76 * For request-based dm.
77 * One of these is allocated per bio.
78 */
79struct dm_rq_clone_bio_info {
80 struct bio *orig;
81 struct request *rq;
82};
83
Linus Torvalds1da177e2005-04-16 15:20:36 -070084union map_info *dm_get_mapinfo(struct bio *bio)
85{
Alasdair G Kergon17b2f662006-06-26 00:27:33 -070086 if (bio && bio->bi_private)
Alasdair G Kergon028867a2007-07-12 17:26:32 +010087 return &((struct dm_target_io *)bio->bi_private)->info;
Alasdair G Kergon17b2f662006-06-26 00:27:33 -070088 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -070089}
90
Jeff Mahoneyba61fdd2006-06-26 00:27:21 -070091#define MINOR_ALLOCED ((void *)-1)
92
Linus Torvalds1da177e2005-04-16 15:20:36 -070093/*
94 * Bits for the md->flags field.
95 */
Alasdair G Kergon1eb787e2009-04-09 00:27:14 +010096#define DMF_BLOCK_IO_FOR_SUSPEND 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070097#define DMF_SUSPENDED 1
Alasdair G Kergonaa8d7c22006-01-06 00:20:06 -080098#define DMF_FROZEN 2
Jeff Mahoneyfba9f902006-06-26 00:27:23 -070099#define DMF_FREEING 3
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -0700100#define DMF_DELETING 4
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800101#define DMF_NOFLUSH_SUSPENDING 5
Alasdair G Kergon1eb787e2009-04-09 00:27:14 +0100102#define DMF_QUEUE_IO_TO_THREAD 6
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103
Milan Broz304f3f62008-02-08 02:11:17 +0000104/*
105 * Work processed by per-device workqueue.
106 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107struct mapped_device {
Alasdair G Kergon2ca33102005-07-28 21:16:00 -0700108 struct rw_semaphore io_lock;
Daniel Walkere61290a2008-02-08 02:10:08 +0000109 struct mutex suspend_lock;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110 rwlock_t map_lock;
111 atomic_t holders;
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -0700112 atomic_t open_count;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113
114 unsigned long flags;
115
Jens Axboe165125e2007-07-24 09:28:11 +0200116 struct request_queue *queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117 struct gendisk *disk;
Mike Anderson7e51f252006-03-27 01:17:52 -0800118 char name[16];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119
120 void *interface_ptr;
121
122 /*
123 * A list of ios that arrived while we were suspended.
124 */
125 atomic_t pending;
126 wait_queue_head_t wait;
Mikulas Patocka53d59142009-04-02 19:55:37 +0100127 struct work_struct work;
Kiyoshi Ueda74859362006-12-08 02:41:02 -0800128 struct bio_list deferred;
Mikulas Patocka022c2612009-04-02 19:55:39 +0100129 spinlock_t deferred_lock;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130
131 /*
Mikulas Patockaaf7e4662009-04-09 00:27:16 +0100132 * An error from the barrier request currently being processed.
133 */
134 int barrier_error;
135
136 /*
Milan Broz304f3f62008-02-08 02:11:17 +0000137 * Processing queue (flush/barriers)
138 */
139 struct workqueue_struct *wq;
140
141 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142 * The current mapping.
143 */
144 struct dm_table *map;
145
146 /*
147 * io objects are allocated from here.
148 */
149 mempool_t *io_pool;
150 mempool_t *tio_pool;
151
Stefan Bader9faf4002006-10-03 01:15:41 -0700152 struct bio_set *bs;
153
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154 /*
155 * Event handling.
156 */
157 atomic_t event_nr;
158 wait_queue_head_t eventq;
Mike Anderson7a8c3d32007-10-19 22:48:01 +0100159 atomic_t uevent_seq;
160 struct list_head uevent_list;
161 spinlock_t uevent_lock; /* Protect access to uevent_list */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162
163 /*
164 * freeze/thaw support require holding onto a super block
165 */
166 struct super_block *frozen_sb;
Mikulas Patockadb8fef42009-06-22 10:12:15 +0100167 struct block_device *bdev;
Darrick J. Wong3ac51e72006-03-27 01:17:54 -0800168
169 /* forced geometry settings */
170 struct hd_geometry geometry;
Milan Broz784aae72009-01-06 03:05:12 +0000171
172 /* sysfs handle */
173 struct kobject kobj;
Mikulas Patocka52b1fd52009-06-22 10:12:21 +0100174
175 /* zero-length barrier that will be cloned and submitted to targets */
176 struct bio barrier_bio;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177};
178
179#define MIN_IOS 256
Christoph Lametere18b8902006-12-06 20:33:20 -0800180static struct kmem_cache *_io_cache;
181static struct kmem_cache *_tio_cache;
Kiyoshi Ueda8fbf26a2009-01-06 03:05:06 +0000182static struct kmem_cache *_rq_tio_cache;
183static struct kmem_cache *_rq_bio_info_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185static int __init local_init(void)
186{
Kiyoshi Ueda51157b42008-10-21 17:45:08 +0100187 int r = -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 /* allocate a slab for the dm_ios */
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100190 _io_cache = KMEM_CACHE(dm_io, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 if (!_io_cache)
Kiyoshi Ueda51157b42008-10-21 17:45:08 +0100192 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193
194 /* allocate a slab for the target ios */
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100195 _tio_cache = KMEM_CACHE(dm_target_io, 0);
Kiyoshi Ueda51157b42008-10-21 17:45:08 +0100196 if (!_tio_cache)
197 goto out_free_io_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198
Kiyoshi Ueda8fbf26a2009-01-06 03:05:06 +0000199 _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0);
200 if (!_rq_tio_cache)
201 goto out_free_tio_cache;
202
203 _rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0);
204 if (!_rq_bio_info_cache)
205 goto out_free_rq_tio_cache;
206
Mike Anderson51e5b2b2007-10-19 22:48:00 +0100207 r = dm_uevent_init();
Kiyoshi Ueda51157b42008-10-21 17:45:08 +0100208 if (r)
Kiyoshi Ueda8fbf26a2009-01-06 03:05:06 +0000209 goto out_free_rq_bio_info_cache;
Mike Anderson51e5b2b2007-10-19 22:48:00 +0100210
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 _major = major;
212 r = register_blkdev(_major, _name);
Kiyoshi Ueda51157b42008-10-21 17:45:08 +0100213 if (r < 0)
214 goto out_uevent_exit;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215
216 if (!_major)
217 _major = r;
218
219 return 0;
Kiyoshi Ueda51157b42008-10-21 17:45:08 +0100220
221out_uevent_exit:
222 dm_uevent_exit();
Kiyoshi Ueda8fbf26a2009-01-06 03:05:06 +0000223out_free_rq_bio_info_cache:
224 kmem_cache_destroy(_rq_bio_info_cache);
225out_free_rq_tio_cache:
226 kmem_cache_destroy(_rq_tio_cache);
Kiyoshi Ueda51157b42008-10-21 17:45:08 +0100227out_free_tio_cache:
228 kmem_cache_destroy(_tio_cache);
229out_free_io_cache:
230 kmem_cache_destroy(_io_cache);
231
232 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233}
234
235static void local_exit(void)
236{
Kiyoshi Ueda8fbf26a2009-01-06 03:05:06 +0000237 kmem_cache_destroy(_rq_bio_info_cache);
238 kmem_cache_destroy(_rq_tio_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239 kmem_cache_destroy(_tio_cache);
240 kmem_cache_destroy(_io_cache);
Akinobu Mita00d59402007-07-17 04:03:46 -0700241 unregister_blkdev(_major, _name);
Mike Anderson51e5b2b2007-10-19 22:48:00 +0100242 dm_uevent_exit();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243
244 _major = 0;
245
246 DMINFO("cleaned up");
247}
248
Alasdair G Kergonb9249e52008-02-08 02:09:51 +0000249static int (*_inits[])(void) __initdata = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250 local_init,
251 dm_target_init,
252 dm_linear_init,
253 dm_stripe_init,
Mikulas Patocka945fa4d2008-04-24 21:43:49 +0100254 dm_kcopyd_init,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 dm_interface_init,
256};
257
Alasdair G Kergonb9249e52008-02-08 02:09:51 +0000258static void (*_exits[])(void) = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 local_exit,
260 dm_target_exit,
261 dm_linear_exit,
262 dm_stripe_exit,
Mikulas Patocka945fa4d2008-04-24 21:43:49 +0100263 dm_kcopyd_exit,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 dm_interface_exit,
265};
266
267static int __init dm_init(void)
268{
269 const int count = ARRAY_SIZE(_inits);
270
271 int r, i;
272
273 for (i = 0; i < count; i++) {
274 r = _inits[i]();
275 if (r)
276 goto bad;
277 }
278
279 return 0;
280
281 bad:
282 while (i--)
283 _exits[i]();
284
285 return r;
286}
287
288static void __exit dm_exit(void)
289{
290 int i = ARRAY_SIZE(_exits);
291
292 while (i--)
293 _exits[i]();
294}
295
296/*
297 * Block device functions
298 */
Al Virofe5f9f22008-03-02 10:29:31 -0500299static int dm_blk_open(struct block_device *bdev, fmode_t mode)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300{
301 struct mapped_device *md;
302
Jeff Mahoneyfba9f902006-06-26 00:27:23 -0700303 spin_lock(&_minor_lock);
304
Al Virofe5f9f22008-03-02 10:29:31 -0500305 md = bdev->bd_disk->private_data;
Jeff Mahoneyfba9f902006-06-26 00:27:23 -0700306 if (!md)
307 goto out;
308
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -0700309 if (test_bit(DMF_FREEING, &md->flags) ||
310 test_bit(DMF_DELETING, &md->flags)) {
Jeff Mahoneyfba9f902006-06-26 00:27:23 -0700311 md = NULL;
312 goto out;
313 }
314
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 dm_get(md);
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -0700316 atomic_inc(&md->open_count);
Jeff Mahoneyfba9f902006-06-26 00:27:23 -0700317
318out:
319 spin_unlock(&_minor_lock);
320
321 return md ? 0 : -ENXIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322}
323
Al Virofe5f9f22008-03-02 10:29:31 -0500324static int dm_blk_close(struct gendisk *disk, fmode_t mode)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325{
Al Virofe5f9f22008-03-02 10:29:31 -0500326 struct mapped_device *md = disk->private_data;
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -0700327 atomic_dec(&md->open_count);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328 dm_put(md);
329 return 0;
330}
331
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -0700332int dm_open_count(struct mapped_device *md)
333{
334 return atomic_read(&md->open_count);
335}
336
337/*
338 * Guarantees nothing is using the device before it's deleted.
339 */
340int dm_lock_for_deletion(struct mapped_device *md)
341{
342 int r = 0;
343
344 spin_lock(&_minor_lock);
345
346 if (dm_open_count(md))
347 r = -EBUSY;
348 else
349 set_bit(DMF_DELETING, &md->flags);
350
351 spin_unlock(&_minor_lock);
352
353 return r;
354}
355
Darrick J. Wong3ac51e72006-03-27 01:17:54 -0800356static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
357{
358 struct mapped_device *md = bdev->bd_disk->private_data;
359
360 return dm_get_geometry(md, geo);
361}
362
Al Virofe5f9f22008-03-02 10:29:31 -0500363static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
Milan Brozaa129a22006-10-03 01:15:15 -0700364 unsigned int cmd, unsigned long arg)
365{
Al Virofe5f9f22008-03-02 10:29:31 -0500366 struct mapped_device *md = bdev->bd_disk->private_data;
367 struct dm_table *map = dm_get_table(md);
Milan Brozaa129a22006-10-03 01:15:15 -0700368 struct dm_target *tgt;
369 int r = -ENOTTY;
370
Milan Brozaa129a22006-10-03 01:15:15 -0700371 if (!map || !dm_table_get_size(map))
372 goto out;
373
374 /* We only support devices that have a single target */
375 if (dm_table_get_num_targets(map) != 1)
376 goto out;
377
378 tgt = dm_table_get_target(map, 0);
379
380 if (dm_suspended(md)) {
381 r = -EAGAIN;
382 goto out;
383 }
384
385 if (tgt->type->ioctl)
Al Viro647b3d02007-08-28 22:15:59 -0400386 r = tgt->type->ioctl(tgt, cmd, arg);
Milan Brozaa129a22006-10-03 01:15:15 -0700387
388out:
389 dm_table_put(map);
390
Milan Brozaa129a22006-10-03 01:15:15 -0700391 return r;
392}
393
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100394static struct dm_io *alloc_io(struct mapped_device *md)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395{
396 return mempool_alloc(md->io_pool, GFP_NOIO);
397}
398
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100399static void free_io(struct mapped_device *md, struct dm_io *io)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400{
401 mempool_free(io, md->io_pool);
402}
403
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100404static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405{
406 mempool_free(tio, md->tio_pool);
407}
408
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800409static void start_io_acct(struct dm_io *io)
410{
411 struct mapped_device *md = io->md;
Tejun Heoc9959052008-08-25 19:47:21 +0900412 int cpu;
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800413
414 io->start_time = jiffies;
415
Tejun Heo074a7ac2008-08-25 19:56:14 +0900416 cpu = part_stat_lock();
417 part_round_stats(cpu, &dm_disk(md)->part0);
418 part_stat_unlock();
419 dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending);
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800420}
421
Mikulas Patockad221d2e2008-11-13 23:39:10 +0000422static void end_io_acct(struct dm_io *io)
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800423{
424 struct mapped_device *md = io->md;
425 struct bio *bio = io->bio;
426 unsigned long duration = jiffies - io->start_time;
Tejun Heoc9959052008-08-25 19:47:21 +0900427 int pending, cpu;
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800428 int rw = bio_data_dir(bio);
429
Tejun Heo074a7ac2008-08-25 19:56:14 +0900430 cpu = part_stat_lock();
431 part_round_stats(cpu, &dm_disk(md)->part0);
432 part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
433 part_stat_unlock();
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800434
Mikulas Patockaaf7e4662009-04-09 00:27:16 +0100435 /*
436 * After this is decremented the bio must not be touched if it is
437 * a barrier.
438 */
Tejun Heo074a7ac2008-08-25 19:56:14 +0900439 dm_disk(md)->part0.in_flight = pending =
440 atomic_dec_return(&md->pending);
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800441
Mikulas Patockad221d2e2008-11-13 23:39:10 +0000442 /* nudge anyone waiting on suspend queue */
443 if (!pending)
444 wake_up(&md->wait);
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800445}
446
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447/*
448 * Add the bio to the list of deferred io.
449 */
Mikulas Patocka92c63902009-04-09 00:27:15 +0100450static void queue_io(struct mapped_device *md, struct bio *bio)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451{
Alasdair G Kergon2ca33102005-07-28 21:16:00 -0700452 down_write(&md->io_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453
Mikulas Patocka022c2612009-04-02 19:55:39 +0100454 spin_lock_irq(&md->deferred_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 bio_list_add(&md->deferred, bio);
Mikulas Patocka022c2612009-04-02 19:55:39 +0100456 spin_unlock_irq(&md->deferred_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457
Mikulas Patocka92c63902009-04-09 00:27:15 +0100458 if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags))
459 queue_work(md->wq, &md->work);
460
Alasdair G Kergon2ca33102005-07-28 21:16:00 -0700461 up_write(&md->io_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462}
463
464/*
465 * Everyone (including functions in this file), should use this
466 * function to access the md->map field, and make sure they call
467 * dm_table_put() when finished.
468 */
469struct dm_table *dm_get_table(struct mapped_device *md)
470{
471 struct dm_table *t;
472
473 read_lock(&md->map_lock);
474 t = md->map;
475 if (t)
476 dm_table_get(t);
477 read_unlock(&md->map_lock);
478
479 return t;
480}
481
Darrick J. Wong3ac51e72006-03-27 01:17:54 -0800482/*
483 * Get the geometry associated with a dm device
484 */
485int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo)
486{
487 *geo = md->geometry;
488
489 return 0;
490}
491
492/*
493 * Set the geometry of a device.
494 */
495int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo)
496{
497 sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors;
498
499 if (geo->start > sz) {
500 DMWARN("Start sector is beyond the geometry limits.");
501 return -EINVAL;
502 }
503
504 md->geometry = *geo;
505
506 return 0;
507}
508
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509/*-----------------------------------------------------------------
510 * CRUD START:
511 * A more elegant soln is in the works that uses the queue
512 * merge fn, unfortunately there are a couple of changes to
513 * the block layer that I want to make for this. So in the
514 * interests of getting something for people to use I give
515 * you this clearly demarcated crap.
516 *---------------------------------------------------------------*/
517
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800518static int __noflush_suspending(struct mapped_device *md)
519{
520 return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
521}
522
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523/*
524 * Decrements the number of outstanding ios that a bio has been
525 * cloned into, completing the original io if necc.
526 */
Arjan van de Ven858119e2006-01-14 13:20:43 -0800527static void dec_pending(struct dm_io *io, int error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528{
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800529 unsigned long flags;
Milan Brozb35f8ca2009-03-16 17:44:36 +0000530 int io_error;
531 struct bio *bio;
532 struct mapped_device *md = io->md;
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800533
534 /* Push-back supersedes any I/O errors */
Milan Brozb35f8ca2009-03-16 17:44:36 +0000535 if (error && !(io->error > 0 && __noflush_suspending(md)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 io->error = error;
537
538 if (atomic_dec_and_test(&io->io_count)) {
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800539 if (io->error == DM_ENDIO_REQUEUE) {
540 /*
541 * Target requested pushing back the I/O.
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800542 */
Mikulas Patocka022c2612009-04-02 19:55:39 +0100543 spin_lock_irqsave(&md->deferred_lock, flags);
Mikulas Patocka2761e952009-06-22 10:12:18 +0100544 if (__noflush_suspending(md)) {
545 if (!bio_barrier(io->bio))
546 bio_list_add_head(&md->deferred,
547 io->bio);
548 } else
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800549 /* noflush suspend was interrupted. */
550 io->error = -EIO;
Mikulas Patocka022c2612009-04-02 19:55:39 +0100551 spin_unlock_irqrestore(&md->deferred_lock, flags);
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800552 }
553
Milan Brozb35f8ca2009-03-16 17:44:36 +0000554 io_error = io->error;
555 bio = io->bio;
Jens Axboe2056a782006-03-23 20:00:26 +0100556
Mikulas Patockaaf7e4662009-04-09 00:27:16 +0100557 if (bio_barrier(bio)) {
558 /*
559 * There can be just one barrier request so we use
560 * a per-device variable for error reporting.
561 * Note that you can't touch the bio after end_io_acct
562 */
Mikulas Patockafdb95722009-06-22 10:12:19 +0100563 if (!md->barrier_error && io_error != -EOPNOTSUPP)
Mikulas Patocka5aa27812009-06-22 10:12:18 +0100564 md->barrier_error = io_error;
Mikulas Patockaaf7e4662009-04-09 00:27:16 +0100565 end_io_acct(io);
566 } else {
567 end_io_acct(io);
Milan Brozb35f8ca2009-03-16 17:44:36 +0000568
Mikulas Patockaaf7e4662009-04-09 00:27:16 +0100569 if (io_error != DM_ENDIO_REQUEUE) {
570 trace_block_bio_complete(md->queue, bio);
Milan Brozb35f8ca2009-03-16 17:44:36 +0000571
Mikulas Patockaaf7e4662009-04-09 00:27:16 +0100572 bio_endio(bio, io_error);
573 }
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800574 }
Mikulas Patockaaf7e4662009-04-09 00:27:16 +0100575
576 free_io(md, io);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 }
578}
579
NeilBrown6712ecf2007-09-27 12:47:43 +0200580static void clone_endio(struct bio *bio, int error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581{
582 int r = 0;
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100583 struct dm_target_io *tio = bio->bi_private;
Milan Brozb35f8ca2009-03-16 17:44:36 +0000584 struct dm_io *io = tio->io;
Stefan Bader9faf4002006-10-03 01:15:41 -0700585 struct mapped_device *md = tio->io->md;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586 dm_endio_fn endio = tio->ti->type->end_io;
587
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 if (!bio_flagged(bio, BIO_UPTODATE) && !error)
589 error = -EIO;
590
591 if (endio) {
592 r = endio(tio->ti, bio, error, &tio->info);
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800593 if (r < 0 || r == DM_ENDIO_REQUEUE)
594 /*
595 * error and requeue request are handled
596 * in dec_pending().
597 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 error = r;
Kiyoshi Ueda45cbcd72006-12-08 02:41:05 -0800599 else if (r == DM_ENDIO_INCOMPLETE)
600 /* The target will handle the io */
NeilBrown6712ecf2007-09-27 12:47:43 +0200601 return;
Kiyoshi Ueda45cbcd72006-12-08 02:41:05 -0800602 else if (r) {
603 DMWARN("unimplemented target endio return value: %d", r);
604 BUG();
605 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606 }
607
Stefan Bader9faf4002006-10-03 01:15:41 -0700608 /*
609 * Store md for cleanup instead of tio which is about to get freed.
610 */
611 bio->bi_private = md->bs;
612
Stefan Bader9faf4002006-10-03 01:15:41 -0700613 free_tio(md, tio);
Milan Brozb35f8ca2009-03-16 17:44:36 +0000614 bio_put(bio);
615 dec_pending(io, error);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616}
617
618static sector_t max_io_len(struct mapped_device *md,
619 sector_t sector, struct dm_target *ti)
620{
621 sector_t offset = sector - ti->begin;
622 sector_t len = ti->len - offset;
623
624 /*
625 * Does the target need to split even further ?
626 */
627 if (ti->split_io) {
628 sector_t boundary;
629 boundary = ((offset + ti->split_io) & ~(ti->split_io - 1))
630 - offset;
631 if (len > boundary)
632 len = boundary;
633 }
634
635 return len;
636}
637
638static void __map_bio(struct dm_target *ti, struct bio *clone,
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100639 struct dm_target_io *tio)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640{
641 int r;
Jens Axboe2056a782006-03-23 20:00:26 +0100642 sector_t sector;
Stefan Bader9faf4002006-10-03 01:15:41 -0700643 struct mapped_device *md;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 clone->bi_end_io = clone_endio;
646 clone->bi_private = tio;
647
648 /*
649 * Map the clone. If r == 0 we don't need to do
650 * anything, the target has assumed ownership of
651 * this io.
652 */
653 atomic_inc(&tio->io->io_count);
Jens Axboe2056a782006-03-23 20:00:26 +0100654 sector = clone->bi_sector;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655 r = ti->type->map(ti, clone, &tio->info);
Kiyoshi Ueda45cbcd72006-12-08 02:41:05 -0800656 if (r == DM_MAPIO_REMAPPED) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 /* the bio has been remapped so dispatch it */
Jens Axboe2056a782006-03-23 20:00:26 +0100658
Arnaldo Carvalho de Melo5f3ea372008-10-30 08:34:33 +0100659 trace_block_remap(bdev_get_queue(clone->bi_bdev), clone,
Alan D. Brunelle22a7c312009-05-04 16:35:08 -0400660 tio->io->bio->bi_bdev->bd_dev, sector);
Jens Axboe2056a782006-03-23 20:00:26 +0100661
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662 generic_make_request(clone);
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800663 } else if (r < 0 || r == DM_MAPIO_REQUEUE) {
664 /* error the io and bail out, or requeue it if needed */
Stefan Bader9faf4002006-10-03 01:15:41 -0700665 md = tio->io->md;
666 dec_pending(tio->io, r);
667 /*
668 * Store bio_set for cleanup.
669 */
670 clone->bi_private = md->bs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671 bio_put(clone);
Stefan Bader9faf4002006-10-03 01:15:41 -0700672 free_tio(md, tio);
Kiyoshi Ueda45cbcd72006-12-08 02:41:05 -0800673 } else if (r) {
674 DMWARN("unimplemented target map return value: %d", r);
675 BUG();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676 }
677}
678
679struct clone_info {
680 struct mapped_device *md;
681 struct dm_table *map;
682 struct bio *bio;
683 struct dm_io *io;
684 sector_t sector;
685 sector_t sector_count;
686 unsigned short idx;
687};
688
Peter Osterlund36763472005-09-06 15:16:42 -0700689static void dm_bio_destructor(struct bio *bio)
690{
Stefan Bader9faf4002006-10-03 01:15:41 -0700691 struct bio_set *bs = bio->bi_private;
692
693 bio_free(bio, bs);
Peter Osterlund36763472005-09-06 15:16:42 -0700694}
695
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696/*
697 * Creates a little bio that is just does part of a bvec.
698 */
699static struct bio *split_bvec(struct bio *bio, sector_t sector,
700 unsigned short idx, unsigned int offset,
Stefan Bader9faf4002006-10-03 01:15:41 -0700701 unsigned int len, struct bio_set *bs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702{
703 struct bio *clone;
704 struct bio_vec *bv = bio->bi_io_vec + idx;
705
Stefan Bader9faf4002006-10-03 01:15:41 -0700706 clone = bio_alloc_bioset(GFP_NOIO, 1, bs);
Peter Osterlund36763472005-09-06 15:16:42 -0700707 clone->bi_destructor = dm_bio_destructor;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 *clone->bi_io_vec = *bv;
709
710 clone->bi_sector = sector;
711 clone->bi_bdev = bio->bi_bdev;
Mikulas Patockaaf7e4662009-04-09 00:27:16 +0100712 clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713 clone->bi_vcnt = 1;
714 clone->bi_size = to_bytes(len);
715 clone->bi_io_vec->bv_offset = offset;
716 clone->bi_io_vec->bv_len = clone->bi_size;
Martin K. Petersenf3e1d262008-10-21 17:45:04 +0100717 clone->bi_flags |= 1 << BIO_CLONED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718
Martin K. Petersen9c470082009-04-09 00:27:12 +0100719 if (bio_integrity(bio)) {
720 bio_integrity_clone(clone, bio, GFP_NOIO);
721 bio_integrity_trim(clone,
722 bio_sector_offset(bio, idx, offset), len);
723 }
724
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725 return clone;
726}
727
728/*
729 * Creates a bio that consists of range of complete bvecs.
730 */
731static struct bio *clone_bio(struct bio *bio, sector_t sector,
732 unsigned short idx, unsigned short bv_count,
Stefan Bader9faf4002006-10-03 01:15:41 -0700733 unsigned int len, struct bio_set *bs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734{
735 struct bio *clone;
736
Stefan Bader9faf4002006-10-03 01:15:41 -0700737 clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
738 __bio_clone(clone, bio);
Mikulas Patockaaf7e4662009-04-09 00:27:16 +0100739 clone->bi_rw &= ~(1 << BIO_RW_BARRIER);
Stefan Bader9faf4002006-10-03 01:15:41 -0700740 clone->bi_destructor = dm_bio_destructor;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700741 clone->bi_sector = sector;
742 clone->bi_idx = idx;
743 clone->bi_vcnt = idx + bv_count;
744 clone->bi_size = to_bytes(len);
745 clone->bi_flags &= ~(1 << BIO_SEG_VALID);
746
Martin K. Petersen9c470082009-04-09 00:27:12 +0100747 if (bio_integrity(bio)) {
748 bio_integrity_clone(clone, bio, GFP_NOIO);
749
750 if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
751 bio_integrity_trim(clone,
752 bio_sector_offset(bio, idx, 0), len);
753 }
754
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755 return clone;
756}
757
Alasdair G Kergon9015df22009-06-22 10:12:21 +0100758static struct dm_target_io *alloc_tio(struct clone_info *ci,
759 struct dm_target *ti)
Mikulas Patockaf9ab94c2009-06-22 10:12:20 +0100760{
Alasdair G Kergon9015df22009-06-22 10:12:21 +0100761 struct dm_target_io *tio = mempool_alloc(ci->md->tio_pool, GFP_NOIO);
Mikulas Patockaf9ab94c2009-06-22 10:12:20 +0100762
763 tio->io = ci->io;
764 tio->ti = ti;
Mikulas Patockaf9ab94c2009-06-22 10:12:20 +0100765 memset(&tio->info, 0, sizeof(tio->info));
Alasdair G Kergon9015df22009-06-22 10:12:21 +0100766
767 return tio;
768}
769
770static void __flush_target(struct clone_info *ci, struct dm_target *ti,
771 unsigned flush_nr)
772{
773 struct dm_target_io *tio = alloc_tio(ci, ti);
774 struct bio *clone;
775
Mikulas Patockaf9ab94c2009-06-22 10:12:20 +0100776 tio->info.flush_request = flush_nr;
777
778 clone = bio_alloc_bioset(GFP_NOIO, 0, ci->md->bs);
779 __bio_clone(clone, ci->bio);
780 clone->bi_destructor = dm_bio_destructor;
781
782 __map_bio(ti, clone, tio);
783}
784
785static int __clone_and_map_empty_barrier(struct clone_info *ci)
786{
787 unsigned target_nr = 0, flush_nr;
788 struct dm_target *ti;
789
790 while ((ti = dm_table_get_target(ci->map, target_nr++)))
791 for (flush_nr = 0; flush_nr < ti->num_flush_requests;
792 flush_nr++)
793 __flush_target(ci, ti, flush_nr);
794
795 ci->sector_count = 0;
796
797 return 0;
798}
799
Jun'ichi Nomura512875b2007-12-13 14:15:25 +0000800static int __clone_and_map(struct clone_info *ci)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801{
802 struct bio *clone, *bio = ci->bio;
Jun'ichi Nomura512875b2007-12-13 14:15:25 +0000803 struct dm_target *ti;
804 sector_t len = 0, max;
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100805 struct dm_target_io *tio;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806
Mikulas Patockaf9ab94c2009-06-22 10:12:20 +0100807 if (unlikely(bio_empty_barrier(bio)))
808 return __clone_and_map_empty_barrier(ci);
809
Jun'ichi Nomura512875b2007-12-13 14:15:25 +0000810 ti = dm_table_find_target(ci->map, ci->sector);
811 if (!dm_target_is_valid(ti))
812 return -EIO;
813
814 max = max_io_len(ci->md, ci->sector, ti);
815
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816 /*
817 * Allocate a target io object.
818 */
Alasdair G Kergon9015df22009-06-22 10:12:21 +0100819 tio = alloc_tio(ci, ti);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820
821 if (ci->sector_count <= max) {
822 /*
823 * Optimise for the simple case where we can do all of
824 * the remaining io with a single clone.
825 */
826 clone = clone_bio(bio, ci->sector, ci->idx,
Stefan Bader9faf4002006-10-03 01:15:41 -0700827 bio->bi_vcnt - ci->idx, ci->sector_count,
828 ci->md->bs);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829 __map_bio(ti, clone, tio);
830 ci->sector_count = 0;
831
832 } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) {
833 /*
834 * There are some bvecs that don't span targets.
835 * Do as many of these as possible.
836 */
837 int i;
838 sector_t remaining = max;
839 sector_t bv_len;
840
841 for (i = ci->idx; remaining && (i < bio->bi_vcnt); i++) {
842 bv_len = to_sector(bio->bi_io_vec[i].bv_len);
843
844 if (bv_len > remaining)
845 break;
846
847 remaining -= bv_len;
848 len += bv_len;
849 }
850
Stefan Bader9faf4002006-10-03 01:15:41 -0700851 clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len,
852 ci->md->bs);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853 __map_bio(ti, clone, tio);
854
855 ci->sector += len;
856 ci->sector_count -= len;
857 ci->idx = i;
858
859 } else {
860 /*
Alasdair G Kergond2044a92006-03-22 00:07:42 -0800861 * Handle a bvec that must be split between two or more targets.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862 */
863 struct bio_vec *bv = bio->bi_io_vec + ci->idx;
Alasdair G Kergond2044a92006-03-22 00:07:42 -0800864 sector_t remaining = to_sector(bv->bv_len);
865 unsigned int offset = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866
Alasdair G Kergond2044a92006-03-22 00:07:42 -0800867 do {
868 if (offset) {
869 ti = dm_table_find_target(ci->map, ci->sector);
Jun'ichi Nomura512875b2007-12-13 14:15:25 +0000870 if (!dm_target_is_valid(ti))
871 return -EIO;
872
Alasdair G Kergond2044a92006-03-22 00:07:42 -0800873 max = max_io_len(ci->md, ci->sector, ti);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874
Alasdair G Kergon9015df22009-06-22 10:12:21 +0100875 tio = alloc_tio(ci, ti);
Alasdair G Kergond2044a92006-03-22 00:07:42 -0800876 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877
Alasdair G Kergond2044a92006-03-22 00:07:42 -0800878 len = min(remaining, max);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879
Alasdair G Kergond2044a92006-03-22 00:07:42 -0800880 clone = split_bvec(bio, ci->sector, ci->idx,
Stefan Bader9faf4002006-10-03 01:15:41 -0700881 bv->bv_offset + offset, len,
882 ci->md->bs);
Alasdair G Kergond2044a92006-03-22 00:07:42 -0800883
884 __map_bio(ti, clone, tio);
885
886 ci->sector += len;
887 ci->sector_count -= len;
888 offset += to_bytes(len);
889 } while (remaining -= len);
890
Linus Torvalds1da177e2005-04-16 15:20:36 -0700891 ci->idx++;
892 }
Jun'ichi Nomura512875b2007-12-13 14:15:25 +0000893
894 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895}
896
897/*
Mikulas Patocka8a53c282009-04-02 19:55:37 +0100898 * Split the bio into several clones and submit it to targets.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 */
Mikulas Patockaf0b9a452009-04-02 19:55:38 +0100900static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901{
902 struct clone_info ci;
Jun'ichi Nomura512875b2007-12-13 14:15:25 +0000903 int error = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904
905 ci.map = dm_get_table(md);
Mikulas Patockaf0b9a452009-04-02 19:55:38 +0100906 if (unlikely(!ci.map)) {
Mikulas Patockaaf7e4662009-04-09 00:27:16 +0100907 if (!bio_barrier(bio))
908 bio_io_error(bio);
909 else
Mikulas Patocka5aa27812009-06-22 10:12:18 +0100910 if (!md->barrier_error)
911 md->barrier_error = -EIO;
Mikulas Patockaf0b9a452009-04-02 19:55:38 +0100912 return;
913 }
Mikulas Patocka692d0eb2009-04-09 00:27:13 +0100914
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 ci.md = md;
916 ci.bio = bio;
917 ci.io = alloc_io(md);
918 ci.io->error = 0;
919 atomic_set(&ci.io->io_count, 1);
920 ci.io->bio = bio;
921 ci.io->md = md;
922 ci.sector = bio->bi_sector;
923 ci.sector_count = bio_sectors(bio);
Mikulas Patockaf9ab94c2009-06-22 10:12:20 +0100924 if (unlikely(bio_empty_barrier(bio)))
925 ci.sector_count = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926 ci.idx = bio->bi_idx;
927
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800928 start_io_acct(ci.io);
Jun'ichi Nomura512875b2007-12-13 14:15:25 +0000929 while (ci.sector_count && !error)
930 error = __clone_and_map(&ci);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931
932 /* drop the extra reference count */
Jun'ichi Nomura512875b2007-12-13 14:15:25 +0000933 dec_pending(ci.io, error);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 dm_table_put(ci.map);
935}
936/*-----------------------------------------------------------------
937 * CRUD END
938 *---------------------------------------------------------------*/
939
Milan Brozf6fccb12008-07-21 12:00:37 +0100940static int dm_merge_bvec(struct request_queue *q,
941 struct bvec_merge_data *bvm,
942 struct bio_vec *biovec)
943{
944 struct mapped_device *md = q->queuedata;
945 struct dm_table *map = dm_get_table(md);
946 struct dm_target *ti;
947 sector_t max_sectors;
Mikulas Patocka50371082008-10-01 14:39:17 +0100948 int max_size = 0;
Milan Brozf6fccb12008-07-21 12:00:37 +0100949
950 if (unlikely(!map))
Mikulas Patocka50371082008-10-01 14:39:17 +0100951 goto out;
Milan Brozf6fccb12008-07-21 12:00:37 +0100952
953 ti = dm_table_find_target(map, bvm->bi_sector);
Mikulas Patockab01cd5a2008-10-01 14:39:24 +0100954 if (!dm_target_is_valid(ti))
955 goto out_table;
Milan Brozf6fccb12008-07-21 12:00:37 +0100956
957 /*
958 * Find maximum amount of I/O that won't need splitting
959 */
960 max_sectors = min(max_io_len(md, bvm->bi_sector, ti),
961 (sector_t) BIO_MAX_SECTORS);
962 max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
963 if (max_size < 0)
964 max_size = 0;
965
966 /*
967 * merge_bvec_fn() returns number of bytes
968 * it can accept at this offset
969 * max is precomputed maximal io size
970 */
971 if (max_size && ti->type->merge)
972 max_size = ti->type->merge(ti, bvm, biovec, max_size);
Mikulas Patocka8cbeb672009-06-22 10:12:14 +0100973 /*
974 * If the target doesn't support merge method and some of the devices
975 * provided their merge_bvec method (we know this by looking at
976 * queue_max_hw_sectors), then we can't allow bios with multiple vector
977 * entries. So always set max_size to 0, and the code below allows
978 * just one page.
979 */
980 else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
981
982 max_size = 0;
Milan Brozf6fccb12008-07-21 12:00:37 +0100983
Mikulas Patockab01cd5a2008-10-01 14:39:24 +0100984out_table:
Mikulas Patocka50371082008-10-01 14:39:17 +0100985 dm_table_put(map);
986
987out:
Milan Brozf6fccb12008-07-21 12:00:37 +0100988 /*
989 * Always allow an entire first page
990 */
991 if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT))
992 max_size = biovec->bv_len;
993
Milan Brozf6fccb12008-07-21 12:00:37 +0100994 return max_size;
995}
996
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997/*
998 * The request function that just remaps the bio built up by
999 * dm_merge_bvec.
1000 */
Jens Axboe165125e2007-07-24 09:28:11 +02001001static int dm_request(struct request_queue *q, struct bio *bio)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002{
Kevin Corry12f03a42006-02-01 03:04:52 -08001003 int rw = bio_data_dir(bio);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 struct mapped_device *md = q->queuedata;
Tejun Heoc9959052008-08-25 19:47:21 +09001005 int cpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001007 down_read(&md->io_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008
Tejun Heo074a7ac2008-08-25 19:56:14 +09001009 cpu = part_stat_lock();
1010 part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]);
1011 part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio));
1012 part_stat_unlock();
Kevin Corry12f03a42006-02-01 03:04:52 -08001013
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014 /*
Alasdair G Kergon1eb787e2009-04-09 00:27:14 +01001015 * If we're suspended or the thread is processing barriers
1016 * we have to queue this io for later.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 */
Mikulas Patockaaf7e4662009-04-09 00:27:16 +01001018 if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
1019 unlikely(bio_barrier(bio))) {
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001020 up_read(&md->io_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021
Alasdair G Kergon54d9a1b2009-04-09 00:27:14 +01001022 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
1023 bio_rw(bio) == READA) {
1024 bio_io_error(bio);
1025 return 0;
1026 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027
Mikulas Patocka92c63902009-04-09 00:27:15 +01001028 queue_io(md, bio);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029
Mikulas Patocka92c63902009-04-09 00:27:15 +01001030 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031 }
1032
Mikulas Patockaf0b9a452009-04-02 19:55:38 +01001033 __split_and_process_bio(md, bio);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001034 up_read(&md->io_lock);
Mikulas Patockaf0b9a452009-04-02 19:55:38 +01001035 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036}
1037
Jens Axboe165125e2007-07-24 09:28:11 +02001038static void dm_unplug_all(struct request_queue *q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039{
1040 struct mapped_device *md = q->queuedata;
1041 struct dm_table *map = dm_get_table(md);
1042
1043 if (map) {
1044 dm_table_unplug_all(map);
1045 dm_table_put(map);
1046 }
1047}
1048
1049static int dm_any_congested(void *congested_data, int bdi_bits)
1050{
Chandra Seetharaman8a57dfc2008-11-13 23:39:14 +00001051 int r = bdi_bits;
1052 struct mapped_device *md = congested_data;
1053 struct dm_table *map;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054
Alasdair G Kergon1eb787e2009-04-09 00:27:14 +01001055 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
Chandra Seetharaman8a57dfc2008-11-13 23:39:14 +00001056 map = dm_get_table(md);
1057 if (map) {
1058 r = dm_table_any_congested(map, bdi_bits);
1059 dm_table_put(map);
1060 }
1061 }
1062
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063 return r;
1064}
1065
1066/*-----------------------------------------------------------------
1067 * An IDR is used to keep track of allocated minor numbers.
1068 *---------------------------------------------------------------*/
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069static DEFINE_IDR(_minor_idr);
1070
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07001071static void free_minor(int minor)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072{
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001073 spin_lock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074 idr_remove(&_minor_idr, minor);
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001075 spin_unlock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076}
1077
1078/*
1079 * See if the device with a specific minor # is free.
1080 */
Frederik Deweerdtcf13ab82008-04-24 22:10:59 +01001081static int specific_minor(int minor)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082{
1083 int r, m;
1084
1085 if (minor >= (1 << MINORBITS))
1086 return -EINVAL;
1087
Jeff Mahoney62f75c22006-06-26 00:27:21 -07001088 r = idr_pre_get(&_minor_idr, GFP_KERNEL);
1089 if (!r)
1090 return -ENOMEM;
1091
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001092 spin_lock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093
1094 if (idr_find(&_minor_idr, minor)) {
1095 r = -EBUSY;
1096 goto out;
1097 }
1098
Jeff Mahoneyba61fdd2006-06-26 00:27:21 -07001099 r = idr_get_new_above(&_minor_idr, MINOR_ALLOCED, minor, &m);
Jeff Mahoney62f75c22006-06-26 00:27:21 -07001100 if (r)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102
1103 if (m != minor) {
1104 idr_remove(&_minor_idr, m);
1105 r = -EBUSY;
1106 goto out;
1107 }
1108
1109out:
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001110 spin_unlock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111 return r;
1112}
1113
Frederik Deweerdtcf13ab82008-04-24 22:10:59 +01001114static int next_free_minor(int *minor)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115{
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07001116 int r, m;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118 r = idr_pre_get(&_minor_idr, GFP_KERNEL);
Jeff Mahoney62f75c22006-06-26 00:27:21 -07001119 if (!r)
1120 return -ENOMEM;
1121
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001122 spin_lock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123
Jeff Mahoneyba61fdd2006-06-26 00:27:21 -07001124 r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m);
Frederik Deweerdtcf13ab82008-04-24 22:10:59 +01001125 if (r)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127
1128 if (m >= (1 << MINORBITS)) {
1129 idr_remove(&_minor_idr, m);
1130 r = -ENOSPC;
1131 goto out;
1132 }
1133
1134 *minor = m;
1135
1136out:
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001137 spin_unlock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 return r;
1139}
1140
1141static struct block_device_operations dm_blk_dops;
1142
Mikulas Patocka53d59142009-04-02 19:55:37 +01001143static void dm_wq_work(struct work_struct *work);
1144
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145/*
1146 * Allocate and initialise a blank device with a given minor.
1147 */
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07001148static struct mapped_device *alloc_dev(int minor)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149{
1150 int r;
Frederik Deweerdtcf13ab82008-04-24 22:10:59 +01001151 struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL);
Jeff Mahoneyba61fdd2006-06-26 00:27:21 -07001152 void *old_md;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153
1154 if (!md) {
1155 DMWARN("unable to allocate device, out of memory.");
1156 return NULL;
1157 }
1158
Jeff Mahoney10da4f72006-06-26 00:27:25 -07001159 if (!try_module_get(THIS_MODULE))
Milan Broz6ed7ade2008-02-08 02:10:19 +00001160 goto bad_module_get;
Jeff Mahoney10da4f72006-06-26 00:27:25 -07001161
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162 /* get a minor number for the dev */
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07001163 if (minor == DM_ANY_MINOR)
Frederik Deweerdtcf13ab82008-04-24 22:10:59 +01001164 r = next_free_minor(&minor);
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07001165 else
Frederik Deweerdtcf13ab82008-04-24 22:10:59 +01001166 r = specific_minor(minor);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167 if (r < 0)
Milan Broz6ed7ade2008-02-08 02:10:19 +00001168 goto bad_minor;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001170 init_rwsem(&md->io_lock);
Daniel Walkere61290a2008-02-08 02:10:08 +00001171 mutex_init(&md->suspend_lock);
Mikulas Patocka022c2612009-04-02 19:55:39 +01001172 spin_lock_init(&md->deferred_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173 rwlock_init(&md->map_lock);
1174 atomic_set(&md->holders, 1);
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -07001175 atomic_set(&md->open_count, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176 atomic_set(&md->event_nr, 0);
Mike Anderson7a8c3d32007-10-19 22:48:01 +01001177 atomic_set(&md->uevent_seq, 0);
1178 INIT_LIST_HEAD(&md->uevent_list);
1179 spin_lock_init(&md->uevent_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180
1181 md->queue = blk_alloc_queue(GFP_KERNEL);
1182 if (!md->queue)
Milan Broz6ed7ade2008-02-08 02:10:19 +00001183 goto bad_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184
1185 md->queue->queuedata = md;
1186 md->queue->backing_dev_info.congested_fn = dm_any_congested;
1187 md->queue->backing_dev_info.congested_data = md;
1188 blk_queue_make_request(md->queue, dm_request);
Mikulas Patocka99360b42009-04-02 19:55:39 +01001189 blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL);
Jens Axboedaef2652006-01-10 10:48:02 +01001190 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191 md->queue->unplug_fn = dm_unplug_all;
Milan Brozf6fccb12008-07-21 12:00:37 +01001192 blk_queue_merge_bvec(md->queue, dm_merge_bvec);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193
Matthew Dobson93d23412006-03-26 01:37:50 -08001194 md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache);
Kiyoshi Ueda74859362006-12-08 02:41:02 -08001195 if (!md->io_pool)
Milan Broz6ed7ade2008-02-08 02:10:19 +00001196 goto bad_io_pool;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197
Matthew Dobson93d23412006-03-26 01:37:50 -08001198 md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199 if (!md->tio_pool)
Milan Broz6ed7ade2008-02-08 02:10:19 +00001200 goto bad_tio_pool;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201
Jens Axboebb799ca2008-12-10 15:35:05 +01001202 md->bs = bioset_create(16, 0);
Stefan Bader9faf4002006-10-03 01:15:41 -07001203 if (!md->bs)
1204 goto bad_no_bioset;
1205
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206 md->disk = alloc_disk(1);
1207 if (!md->disk)
Milan Broz6ed7ade2008-02-08 02:10:19 +00001208 goto bad_disk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209
Jeff Mahoneyf0b04112006-06-26 00:27:25 -07001210 atomic_set(&md->pending, 0);
1211 init_waitqueue_head(&md->wait);
Mikulas Patocka53d59142009-04-02 19:55:37 +01001212 INIT_WORK(&md->work, dm_wq_work);
Jeff Mahoneyf0b04112006-06-26 00:27:25 -07001213 init_waitqueue_head(&md->eventq);
1214
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 md->disk->major = _major;
1216 md->disk->first_minor = minor;
1217 md->disk->fops = &dm_blk_dops;
1218 md->disk->queue = md->queue;
1219 md->disk->private_data = md;
1220 sprintf(md->disk->disk_name, "dm-%d", minor);
1221 add_disk(md->disk);
Mike Anderson7e51f252006-03-27 01:17:52 -08001222 format_dev_t(md->name, MKDEV(_major, minor));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223
Milan Broz304f3f62008-02-08 02:11:17 +00001224 md->wq = create_singlethread_workqueue("kdmflush");
1225 if (!md->wq)
1226 goto bad_thread;
1227
Mikulas Patocka32a926d2009-06-22 10:12:17 +01001228 md->bdev = bdget_disk(md->disk, 0);
1229 if (!md->bdev)
1230 goto bad_bdev;
1231
Jeff Mahoneyba61fdd2006-06-26 00:27:21 -07001232 /* Populate the mapping, nobody knows we exist yet */
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001233 spin_lock(&_minor_lock);
Jeff Mahoneyba61fdd2006-06-26 00:27:21 -07001234 old_md = idr_replace(&_minor_idr, md, minor);
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001235 spin_unlock(&_minor_lock);
Jeff Mahoneyba61fdd2006-06-26 00:27:21 -07001236
1237 BUG_ON(old_md != MINOR_ALLOCED);
1238
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 return md;
1240
Mikulas Patocka32a926d2009-06-22 10:12:17 +01001241bad_bdev:
1242 destroy_workqueue(md->wq);
Milan Broz304f3f62008-02-08 02:11:17 +00001243bad_thread:
1244 put_disk(md->disk);
Milan Broz6ed7ade2008-02-08 02:10:19 +00001245bad_disk:
Stefan Bader9faf4002006-10-03 01:15:41 -07001246 bioset_free(md->bs);
Milan Broz6ed7ade2008-02-08 02:10:19 +00001247bad_no_bioset:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248 mempool_destroy(md->tio_pool);
Milan Broz6ed7ade2008-02-08 02:10:19 +00001249bad_tio_pool:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250 mempool_destroy(md->io_pool);
Milan Broz6ed7ade2008-02-08 02:10:19 +00001251bad_io_pool:
Al Viro1312f402006-03-12 11:02:03 -05001252 blk_cleanup_queue(md->queue);
Milan Broz6ed7ade2008-02-08 02:10:19 +00001253bad_queue:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254 free_minor(minor);
Milan Broz6ed7ade2008-02-08 02:10:19 +00001255bad_minor:
Jeff Mahoney10da4f72006-06-26 00:27:25 -07001256 module_put(THIS_MODULE);
Milan Broz6ed7ade2008-02-08 02:10:19 +00001257bad_module_get:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258 kfree(md);
1259 return NULL;
1260}
1261
Jun'ichi Nomuraae9da832007-10-19 22:38:43 +01001262static void unlock_fs(struct mapped_device *md);
1263
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264static void free_dev(struct mapped_device *md)
1265{
Tejun Heof331c022008-09-03 09:01:48 +02001266 int minor = MINOR(disk_devt(md->disk));
Jun'ichi Nomura63d94e42006-02-24 13:04:25 -08001267
Mikulas Patocka32a926d2009-06-22 10:12:17 +01001268 unlock_fs(md);
1269 bdput(md->bdev);
Milan Broz304f3f62008-02-08 02:11:17 +00001270 destroy_workqueue(md->wq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271 mempool_destroy(md->tio_pool);
1272 mempool_destroy(md->io_pool);
Stefan Bader9faf4002006-10-03 01:15:41 -07001273 bioset_free(md->bs);
Martin K. Petersen9c470082009-04-09 00:27:12 +01001274 blk_integrity_unregister(md->disk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275 del_gendisk(md->disk);
Jun'ichi Nomura63d94e42006-02-24 13:04:25 -08001276 free_minor(minor);
Jeff Mahoneyfba9f902006-06-26 00:27:23 -07001277
1278 spin_lock(&_minor_lock);
1279 md->disk->private_data = NULL;
1280 spin_unlock(&_minor_lock);
1281
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282 put_disk(md->disk);
Al Viro1312f402006-03-12 11:02:03 -05001283 blk_cleanup_queue(md->queue);
Jeff Mahoney10da4f72006-06-26 00:27:25 -07001284 module_put(THIS_MODULE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285 kfree(md);
1286}
1287
1288/*
1289 * Bind a table to the device.
1290 */
1291static void event_callback(void *context)
1292{
Mike Anderson7a8c3d32007-10-19 22:48:01 +01001293 unsigned long flags;
1294 LIST_HEAD(uevents);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 struct mapped_device *md = (struct mapped_device *) context;
1296
Mike Anderson7a8c3d32007-10-19 22:48:01 +01001297 spin_lock_irqsave(&md->uevent_lock, flags);
1298 list_splice_init(&md->uevent_list, &uevents);
1299 spin_unlock_irqrestore(&md->uevent_lock, flags);
1300
Tejun Heoed9e1982008-08-25 19:56:05 +09001301 dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
Mike Anderson7a8c3d32007-10-19 22:48:01 +01001302
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303 atomic_inc(&md->event_nr);
1304 wake_up(&md->eventq);
1305}
1306
Alasdair G Kergon4e901882005-07-28 21:15:59 -07001307static void __set_size(struct mapped_device *md, sector_t size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308{
Alasdair G Kergon4e901882005-07-28 21:15:59 -07001309 set_capacity(md->disk, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310
Mikulas Patockadb8fef42009-06-22 10:12:15 +01001311 mutex_lock(&md->bdev->bd_inode->i_mutex);
1312 i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
1313 mutex_unlock(&md->bdev->bd_inode->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314}
1315
1316static int __bind(struct mapped_device *md, struct dm_table *t)
1317{
Jens Axboe165125e2007-07-24 09:28:11 +02001318 struct request_queue *q = md->queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319 sector_t size;
1320
1321 size = dm_table_get_size(t);
Darrick J. Wong3ac51e72006-03-27 01:17:54 -08001322
1323 /*
1324 * Wipe any geometry if the size of the table changed.
1325 */
1326 if (size != get_capacity(md->disk))
1327 memset(&md->geometry, 0, sizeof(md->geometry));
1328
Mikulas Patocka32a926d2009-06-22 10:12:17 +01001329 __set_size(md, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330
Mikulas Patockad5816872009-01-06 03:05:10 +00001331 if (!size) {
1332 dm_table_destroy(t);
1333 return 0;
1334 }
1335
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001336 dm_table_event_callback(t, event_callback, md);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001337
1338 write_lock(&md->map_lock);
1339 md->map = t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340 dm_table_set_restrictions(t, q);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001341 write_unlock(&md->map_lock);
1342
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343 return 0;
1344}
1345
1346static void __unbind(struct mapped_device *md)
1347{
1348 struct dm_table *map = md->map;
1349
1350 if (!map)
1351 return;
1352
1353 dm_table_event_callback(map, NULL, NULL);
1354 write_lock(&md->map_lock);
1355 md->map = NULL;
1356 write_unlock(&md->map_lock);
Mikulas Patockad5816872009-01-06 03:05:10 +00001357 dm_table_destroy(map);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358}
1359
1360/*
1361 * Constructor for a new device.
1362 */
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07001363int dm_create(int minor, struct mapped_device **result)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364{
1365 struct mapped_device *md;
1366
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07001367 md = alloc_dev(minor);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001368 if (!md)
1369 return -ENXIO;
1370
Milan Broz784aae72009-01-06 03:05:12 +00001371 dm_sysfs_init(md);
1372
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 *result = md;
1374 return 0;
1375}
1376
David Teigland637842c2006-01-06 00:20:00 -08001377static struct mapped_device *dm_find_md(dev_t dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378{
1379 struct mapped_device *md;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 unsigned minor = MINOR(dev);
1381
1382 if (MAJOR(dev) != _major || minor >= (1 << MINORBITS))
1383 return NULL;
1384
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001385 spin_lock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001386
1387 md = idr_find(&_minor_idr, minor);
Jeff Mahoneyfba9f902006-06-26 00:27:23 -07001388 if (md && (md == MINOR_ALLOCED ||
Tejun Heof331c022008-09-03 09:01:48 +02001389 (MINOR(disk_devt(dm_disk(md))) != minor) ||
Alasdair G Kergon17b2f662006-06-26 00:27:33 -07001390 test_bit(DMF_FREEING, &md->flags))) {
David Teigland637842c2006-01-06 00:20:00 -08001391 md = NULL;
Jeff Mahoneyfba9f902006-06-26 00:27:23 -07001392 goto out;
1393 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394
Jeff Mahoneyfba9f902006-06-26 00:27:23 -07001395out:
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001396 spin_unlock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397
David Teigland637842c2006-01-06 00:20:00 -08001398 return md;
1399}
1400
David Teiglandd229a952006-01-06 00:20:01 -08001401struct mapped_device *dm_get_md(dev_t dev)
1402{
1403 struct mapped_device *md = dm_find_md(dev);
1404
1405 if (md)
1406 dm_get(md);
1407
1408 return md;
1409}
1410
Alasdair G Kergon9ade92a2006-03-27 01:17:53 -08001411void *dm_get_mdptr(struct mapped_device *md)
David Teigland637842c2006-01-06 00:20:00 -08001412{
Alasdair G Kergon9ade92a2006-03-27 01:17:53 -08001413 return md->interface_ptr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414}
1415
1416void dm_set_mdptr(struct mapped_device *md, void *ptr)
1417{
1418 md->interface_ptr = ptr;
1419}
1420
1421void dm_get(struct mapped_device *md)
1422{
1423 atomic_inc(&md->holders);
1424}
1425
Alasdair G Kergon72d94862006-06-26 00:27:35 -07001426const char *dm_device_name(struct mapped_device *md)
1427{
1428 return md->name;
1429}
1430EXPORT_SYMBOL_GPL(dm_device_name);
1431
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432void dm_put(struct mapped_device *md)
1433{
Mike Anderson1134e5a2006-03-27 01:17:54 -08001434 struct dm_table *map;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435
Jeff Mahoneyfba9f902006-06-26 00:27:23 -07001436 BUG_ON(test_bit(DMF_FREEING, &md->flags));
1437
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001438 if (atomic_dec_and_lock(&md->holders, &_minor_lock)) {
Mike Anderson1134e5a2006-03-27 01:17:54 -08001439 map = dm_get_table(md);
Tejun Heof331c022008-09-03 09:01:48 +02001440 idr_replace(&_minor_idr, MINOR_ALLOCED,
1441 MINOR(disk_devt(dm_disk(md))));
Jeff Mahoneyfba9f902006-06-26 00:27:23 -07001442 set_bit(DMF_FREEING, &md->flags);
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001443 spin_unlock(&_minor_lock);
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001444 if (!dm_suspended(md)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445 dm_table_presuspend_targets(map);
1446 dm_table_postsuspend_targets(map);
1447 }
Milan Broz784aae72009-01-06 03:05:12 +00001448 dm_sysfs_exit(md);
Mike Anderson1134e5a2006-03-27 01:17:54 -08001449 dm_table_put(map);
Mikulas Patockaa1b51e92009-01-06 03:04:53 +00001450 __unbind(md);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001451 free_dev(md);
1452 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453}
Edward Goggin79eb8852007-05-09 02:32:56 -07001454EXPORT_SYMBOL_GPL(dm_put);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455
Mikulas Patocka401600d2009-04-02 19:55:38 +01001456static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
Milan Broz46125c12008-02-08 02:10:30 +00001457{
1458 int r = 0;
Mikulas Patockab44ebeb2009-04-02 19:55:39 +01001459 DECLARE_WAITQUEUE(wait, current);
1460
1461 dm_unplug_all(md->queue);
1462
1463 add_wait_queue(&md->wait, &wait);
Milan Broz46125c12008-02-08 02:10:30 +00001464
1465 while (1) {
Mikulas Patocka401600d2009-04-02 19:55:38 +01001466 set_current_state(interruptible);
Milan Broz46125c12008-02-08 02:10:30 +00001467
1468 smp_mb();
1469 if (!atomic_read(&md->pending))
1470 break;
1471
Mikulas Patocka401600d2009-04-02 19:55:38 +01001472 if (interruptible == TASK_INTERRUPTIBLE &&
1473 signal_pending(current)) {
Milan Broz46125c12008-02-08 02:10:30 +00001474 r = -EINTR;
1475 break;
1476 }
1477
1478 io_schedule();
1479 }
1480 set_current_state(TASK_RUNNING);
1481
Mikulas Patockab44ebeb2009-04-02 19:55:39 +01001482 remove_wait_queue(&md->wait, &wait);
1483
Milan Broz46125c12008-02-08 02:10:30 +00001484 return r;
1485}
1486
Mikulas Patocka531fe962009-06-22 10:12:17 +01001487static void dm_flush(struct mapped_device *md)
Mikulas Patockaaf7e4662009-04-09 00:27:16 +01001488{
1489 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
Mikulas Patocka52b1fd52009-06-22 10:12:21 +01001490
1491 bio_init(&md->barrier_bio);
1492 md->barrier_bio.bi_bdev = md->bdev;
1493 md->barrier_bio.bi_rw = WRITE_BARRIER;
1494 __split_and_process_bio(md, &md->barrier_bio);
1495
1496 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
Mikulas Patockaaf7e4662009-04-09 00:27:16 +01001497}
1498
1499static void process_barrier(struct mapped_device *md, struct bio *bio)
1500{
Mikulas Patocka5aa27812009-06-22 10:12:18 +01001501 md->barrier_error = 0;
1502
Mikulas Patocka531fe962009-06-22 10:12:17 +01001503 dm_flush(md);
Mikulas Patockaaf7e4662009-04-09 00:27:16 +01001504
Mikulas Patocka5aa27812009-06-22 10:12:18 +01001505 if (!bio_empty_barrier(bio)) {
1506 __split_and_process_bio(md, bio);
1507 dm_flush(md);
Mikulas Patockaaf7e4662009-04-09 00:27:16 +01001508 }
1509
Mikulas Patockaaf7e4662009-04-09 00:27:16 +01001510 if (md->barrier_error != DM_ENDIO_REQUEUE)
Mikulas Patocka531fe962009-06-22 10:12:17 +01001511 bio_endio(bio, md->barrier_error);
Mikulas Patocka2761e952009-06-22 10:12:18 +01001512 else {
1513 spin_lock_irq(&md->deferred_lock);
1514 bio_list_add_head(&md->deferred, bio);
1515 spin_unlock_irq(&md->deferred_lock);
1516 }
Mikulas Patockaaf7e4662009-04-09 00:27:16 +01001517}
1518
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519/*
1520 * Process the deferred bios
1521 */
Mikulas Patockaef208582009-04-02 19:55:38 +01001522static void dm_wq_work(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001523{
Mikulas Patockaef208582009-04-02 19:55:38 +01001524 struct mapped_device *md = container_of(work, struct mapped_device,
1525 work);
Milan Broz6d6f10d2008-02-08 02:10:22 +00001526 struct bio *c;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527
Mikulas Patockaef208582009-04-02 19:55:38 +01001528 down_write(&md->io_lock);
1529
Mikulas Patocka3b00b202009-04-09 00:27:15 +01001530 while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
Alasdair G Kergondf12ee92009-04-09 00:27:13 +01001531 spin_lock_irq(&md->deferred_lock);
1532 c = bio_list_pop(&md->deferred);
1533 spin_unlock_irq(&md->deferred_lock);
Mikulas Patocka022c2612009-04-02 19:55:39 +01001534
Alasdair G Kergondf12ee92009-04-09 00:27:13 +01001535 if (!c) {
Alasdair G Kergon1eb787e2009-04-09 00:27:14 +01001536 clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
Alasdair G Kergondf12ee92009-04-09 00:27:13 +01001537 break;
1538 }
1539
Mikulas Patocka3b00b202009-04-09 00:27:15 +01001540 up_write(&md->io_lock);
1541
Mikulas Patockaaf7e4662009-04-09 00:27:16 +01001542 if (bio_barrier(c))
1543 process_barrier(md, c);
1544 else
1545 __split_and_process_bio(md, c);
Mikulas Patocka3b00b202009-04-09 00:27:15 +01001546
1547 down_write(&md->io_lock);
Mikulas Patocka022c2612009-04-02 19:55:39 +01001548 }
Milan Broz73d410c2008-02-08 02:10:25 +00001549
Mikulas Patockaef208582009-04-02 19:55:38 +01001550 up_write(&md->io_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551}
1552
Mikulas Patocka9a1fb462009-04-02 19:55:36 +01001553static void dm_queue_flush(struct mapped_device *md)
Milan Broz304f3f62008-02-08 02:11:17 +00001554{
Mikulas Patocka3b00b202009-04-09 00:27:15 +01001555 clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
1556 smp_mb__after_clear_bit();
Mikulas Patocka53d59142009-04-02 19:55:37 +01001557 queue_work(md->wq, &md->work);
Milan Broz304f3f62008-02-08 02:11:17 +00001558}
1559
Linus Torvalds1da177e2005-04-16 15:20:36 -07001560/*
1561 * Swap in a new table (destroying old one).
1562 */
1563int dm_swap_table(struct mapped_device *md, struct dm_table *table)
1564{
Alasdair G Kergon93c534a2005-07-12 15:53:05 -07001565 int r = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001566
Daniel Walkere61290a2008-02-08 02:10:08 +00001567 mutex_lock(&md->suspend_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568
1569 /* device must be suspended */
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001570 if (!dm_suspended(md))
Alasdair G Kergon93c534a2005-07-12 15:53:05 -07001571 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572
1573 __unbind(md);
1574 r = __bind(md, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575
Alasdair G Kergon93c534a2005-07-12 15:53:05 -07001576out:
Daniel Walkere61290a2008-02-08 02:10:08 +00001577 mutex_unlock(&md->suspend_lock);
Alasdair G Kergon93c534a2005-07-12 15:53:05 -07001578 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001579}
1580
1581/*
1582 * Functions to lock and unlock any filesystem running on the
1583 * device.
1584 */
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001585static int lock_fs(struct mapped_device *md)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586{
Alasdair G Kergone39e2e92006-01-06 00:20:05 -08001587 int r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588
1589 WARN_ON(md->frozen_sb);
Alasdair G Kergondfbe03f2005-05-05 16:16:04 -07001590
Mikulas Patockadb8fef42009-06-22 10:12:15 +01001591 md->frozen_sb = freeze_bdev(md->bdev);
Alasdair G Kergondfbe03f2005-05-05 16:16:04 -07001592 if (IS_ERR(md->frozen_sb)) {
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001593 r = PTR_ERR(md->frozen_sb);
Alasdair G Kergone39e2e92006-01-06 00:20:05 -08001594 md->frozen_sb = NULL;
1595 return r;
Alasdair G Kergondfbe03f2005-05-05 16:16:04 -07001596 }
1597
Alasdair G Kergonaa8d7c22006-01-06 00:20:06 -08001598 set_bit(DMF_FROZEN, &md->flags);
1599
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600 return 0;
1601}
1602
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001603static void unlock_fs(struct mapped_device *md)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001604{
Alasdair G Kergonaa8d7c22006-01-06 00:20:06 -08001605 if (!test_bit(DMF_FROZEN, &md->flags))
1606 return;
1607
Mikulas Patockadb8fef42009-06-22 10:12:15 +01001608 thaw_bdev(md->bdev, md->frozen_sb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 md->frozen_sb = NULL;
Alasdair G Kergonaa8d7c22006-01-06 00:20:06 -08001610 clear_bit(DMF_FROZEN, &md->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611}
1612
1613/*
1614 * We need to be able to change a mapping table under a mounted
1615 * filesystem. For example we might want to move some data in
1616 * the background. Before the table can be swapped with
1617 * dm_bind_table, dm_suspend must be called to flush any in
1618 * flight bios and ensure that any further io gets deferred.
1619 */
Kiyoshi Uedaa3d77d32006-12-08 02:41:04 -08001620int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621{
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001622 struct dm_table *map = NULL;
Milan Broz46125c12008-02-08 02:10:30 +00001623 int r = 0;
Kiyoshi Uedaa3d77d32006-12-08 02:41:04 -08001624 int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -08001625 int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626
Daniel Walkere61290a2008-02-08 02:10:08 +00001627 mutex_lock(&md->suspend_lock);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001628
Milan Broz73d410c2008-02-08 02:10:25 +00001629 if (dm_suspended(md)) {
1630 r = -EINVAL;
Alasdair G Kergond2874832006-11-08 17:44:43 -08001631 goto out_unlock;
Milan Broz73d410c2008-02-08 02:10:25 +00001632 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633
1634 map = dm_get_table(md);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -08001636 /*
1637 * DMF_NOFLUSH_SUSPENDING must be set before presuspend.
1638 * This flag is cleared before dm_suspend returns.
1639 */
1640 if (noflush)
1641 set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
1642
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001643 /* This does not get reverted if there's an error later. */
1644 dm_table_presuspend_targets(map);
1645
Mikulas Patocka32a926d2009-06-22 10:12:17 +01001646 /*
1647 * Flush I/O to the device. noflush supersedes do_lockfs,
1648 * because lock_fs() needs to flush I/Os.
1649 */
1650 if (!noflush && do_lockfs) {
1651 r = lock_fs(md);
1652 if (r)
Kiyoshi Uedaf431d962008-10-21 17:45:07 +01001653 goto out;
Alasdair G Kergonaa8d7c22006-01-06 00:20:06 -08001654 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655
1656 /*
Mikulas Patocka3b00b202009-04-09 00:27:15 +01001657 * Here we must make sure that no processes are submitting requests
1658 * to target drivers i.e. no one may be executing
1659 * __split_and_process_bio. This is called from dm_request and
1660 * dm_wq_work.
1661 *
1662 * To get all processes out of __split_and_process_bio in dm_request,
1663 * we take the write lock. To prevent any process from reentering
1664 * __split_and_process_bio from dm_request, we set
1665 * DMF_QUEUE_IO_TO_THREAD.
1666 *
1667 * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND
1668 * and call flush_workqueue(md->wq). flush_workqueue will wait until
1669 * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any
1670 * further calls to __split_and_process_bio from dm_wq_work.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671 */
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001672 down_write(&md->io_lock);
Alasdair G Kergon1eb787e2009-04-09 00:27:14 +01001673 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
1674 set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001675 up_write(&md->io_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676
Mikulas Patocka3b00b202009-04-09 00:27:15 +01001677 flush_workqueue(md->wq);
1678
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679 /*
Mikulas Patocka3b00b202009-04-09 00:27:15 +01001680 * At this point no more requests are entering target request routines.
1681 * We call dm_wait_for_completion to wait for all existing requests
1682 * to finish.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 */
Mikulas Patocka401600d2009-04-02 19:55:38 +01001684 r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001686 down_write(&md->io_lock);
Milan Broz6d6f10d2008-02-08 02:10:22 +00001687 if (noflush)
Mikulas Patocka022c2612009-04-02 19:55:39 +01001688 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
Milan Broz94d63512008-02-08 02:10:27 +00001689 up_write(&md->io_lock);
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -08001690
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 /* were we interrupted ? */
Milan Broz46125c12008-02-08 02:10:30 +00001692 if (r < 0) {
Mikulas Patocka9a1fb462009-04-02 19:55:36 +01001693 dm_queue_flush(md);
Milan Broz73d410c2008-02-08 02:10:25 +00001694
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001695 unlock_fs(md);
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -08001696 goto out; /* pushback list is already flushed, so skip flush */
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001697 }
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001698
Mikulas Patocka3b00b202009-04-09 00:27:15 +01001699 /*
1700 * If dm_wait_for_completion returned 0, the device is completely
1701 * quiescent now. There is no request-processing activity. All new
1702 * requests are being added to md->deferred list.
1703 */
1704
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001705 dm_table_postsuspend_targets(map);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706
1707 set_bit(DMF_SUSPENDED, &md->flags);
1708
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001709out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 dm_table_put(map);
Alasdair G Kergond2874832006-11-08 17:44:43 -08001711
1712out_unlock:
Daniel Walkere61290a2008-02-08 02:10:08 +00001713 mutex_unlock(&md->suspend_lock);
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001714 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001715}
1716
1717int dm_resume(struct mapped_device *md)
1718{
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001719 int r = -EINVAL;
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001720 struct dm_table *map = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721
Daniel Walkere61290a2008-02-08 02:10:08 +00001722 mutex_lock(&md->suspend_lock);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001723 if (!dm_suspended(md))
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001724 goto out;
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001725
1726 map = dm_get_table(md);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001727 if (!map || !dm_table_get_size(map))
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001728 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001729
Milan Broz8757b772006-10-03 01:15:36 -07001730 r = dm_table_resume_targets(map);
1731 if (r)
1732 goto out;
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001733
Mikulas Patocka9a1fb462009-04-02 19:55:36 +01001734 dm_queue_flush(md);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001735
1736 unlock_fs(md);
1737
1738 clear_bit(DMF_SUSPENDED, &md->flags);
1739
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740 dm_table_unplug_all(map);
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001741 r = 0;
1742out:
1743 dm_table_put(map);
Daniel Walkere61290a2008-02-08 02:10:08 +00001744 mutex_unlock(&md->suspend_lock);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001745
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001746 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747}
1748
1749/*-----------------------------------------------------------------
1750 * Event notification.
1751 *---------------------------------------------------------------*/
Milan Broz60935eb2009-06-22 10:12:30 +01001752void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
1753 unsigned cookie)
Alasdair G Kergon69267a32007-12-13 14:15:57 +00001754{
Milan Broz60935eb2009-06-22 10:12:30 +01001755 char udev_cookie[DM_COOKIE_LENGTH];
1756 char *envp[] = { udev_cookie, NULL };
1757
1758 if (!cookie)
1759 kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
1760 else {
1761 snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
1762 DM_COOKIE_ENV_VAR_NAME, cookie);
1763 kobject_uevent_env(&disk_to_dev(md->disk)->kobj, action, envp);
1764 }
Alasdair G Kergon69267a32007-12-13 14:15:57 +00001765}
1766
Mike Anderson7a8c3d32007-10-19 22:48:01 +01001767uint32_t dm_next_uevent_seq(struct mapped_device *md)
1768{
1769 return atomic_add_return(1, &md->uevent_seq);
1770}
1771
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772uint32_t dm_get_event_nr(struct mapped_device *md)
1773{
1774 return atomic_read(&md->event_nr);
1775}
1776
1777int dm_wait_event(struct mapped_device *md, int event_nr)
1778{
1779 return wait_event_interruptible(md->eventq,
1780 (event_nr != atomic_read(&md->event_nr)));
1781}
1782
Mike Anderson7a8c3d32007-10-19 22:48:01 +01001783void dm_uevent_add(struct mapped_device *md, struct list_head *elist)
1784{
1785 unsigned long flags;
1786
1787 spin_lock_irqsave(&md->uevent_lock, flags);
1788 list_add(elist, &md->uevent_list);
1789 spin_unlock_irqrestore(&md->uevent_lock, flags);
1790}
1791
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792/*
1793 * The gendisk is only valid as long as you have a reference
1794 * count on 'md'.
1795 */
1796struct gendisk *dm_disk(struct mapped_device *md)
1797{
1798 return md->disk;
1799}
1800
Milan Broz784aae72009-01-06 03:05:12 +00001801struct kobject *dm_kobject(struct mapped_device *md)
1802{
1803 return &md->kobj;
1804}
1805
1806/*
1807 * struct mapped_device should not be exported outside of dm.c
1808 * so use this check to verify that kobj is part of md structure
1809 */
1810struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
1811{
1812 struct mapped_device *md;
1813
1814 md = container_of(kobj, struct mapped_device, kobj);
1815 if (&md->kobj != kobj)
1816 return NULL;
1817
Milan Broz4d89b7b2009-06-22 10:12:11 +01001818 if (test_bit(DMF_FREEING, &md->flags) ||
1819 test_bit(DMF_DELETING, &md->flags))
1820 return NULL;
1821
Milan Broz784aae72009-01-06 03:05:12 +00001822 dm_get(md);
1823 return md;
1824}
1825
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826int dm_suspended(struct mapped_device *md)
1827{
1828 return test_bit(DMF_SUSPENDED, &md->flags);
1829}
1830
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -08001831int dm_noflush_suspending(struct dm_target *ti)
1832{
1833 struct mapped_device *md = dm_table_get_md(ti->table);
1834 int r = __noflush_suspending(md);
1835
1836 dm_put(md);
1837
1838 return r;
1839}
1840EXPORT_SYMBOL_GPL(dm_noflush_suspending);
1841
Linus Torvalds1da177e2005-04-16 15:20:36 -07001842static struct block_device_operations dm_blk_dops = {
1843 .open = dm_blk_open,
1844 .release = dm_blk_close,
Milan Brozaa129a22006-10-03 01:15:15 -07001845 .ioctl = dm_blk_ioctl,
Darrick J. Wong3ac51e72006-03-27 01:17:54 -08001846 .getgeo = dm_blk_getgeo,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001847 .owner = THIS_MODULE
1848};
1849
1850EXPORT_SYMBOL(dm_get_mapinfo);
1851
1852/*
1853 * module hooks
1854 */
1855module_init(dm_init);
1856module_exit(dm_exit);
1857
1858module_param(major, uint, 0);
1859MODULE_PARM_DESC(major, "The major number of the device mapper");
1860MODULE_DESCRIPTION(DM_NAME " driver");
1861MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
1862MODULE_LICENSE("GPL");