blob: 829d9fc66453f9e9b5e91761ae285e02a9cda92a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07003 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 *
5 * This file is released under the GPL.
6 */
7
8#include "dm.h"
9#include "dm-bio-list.h"
Mike Anderson51e5b2b2007-10-19 22:48:00 +010010#include "dm-uevent.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070011
12#include <linux/init.h>
13#include <linux/module.h>
Arjan van de Ven48c9c272006-03-27 01:18:20 -080014#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include <linux/moduleparam.h>
16#include <linux/blkpg.h>
17#include <linux/bio.h>
18#include <linux/buffer_head.h>
19#include <linux/mempool.h>
20#include <linux/slab.h>
21#include <linux/idr.h>
Darrick J. Wong3ac51e72006-03-27 01:17:54 -080022#include <linux/hdreg.h>
Jens Axboe2056a782006-03-23 20:00:26 +010023#include <linux/blktrace_api.h>
Milan Brozaa129a22006-10-03 01:15:15 -070024#include <linux/smp_lock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070025
Alasdair G Kergon72d94862006-06-26 00:27:35 -070026#define DM_MSG_PREFIX "core"
27
Linus Torvalds1da177e2005-04-16 15:20:36 -070028static const char *_name = DM_NAME;
29
30static unsigned int major = 0;
31static unsigned int _major = 0;
32
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -070033static DEFINE_SPINLOCK(_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070034/*
35 * One of these is allocated per bio.
36 */
37struct dm_io {
38 struct mapped_device *md;
39 int error;
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 atomic_t io_count;
Richard Kennedy6ae2fa62008-07-21 12:00:28 +010041 struct bio *bio;
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -080042 unsigned long start_time;
Linus Torvalds1da177e2005-04-16 15:20:36 -070043};
44
45/*
46 * One of these is allocated per target within a bio. Hopefully
47 * this will be simplified out one day.
48 */
Alasdair G Kergon028867a2007-07-12 17:26:32 +010049struct dm_target_io {
Linus Torvalds1da177e2005-04-16 15:20:36 -070050 struct dm_io *io;
51 struct dm_target *ti;
52 union map_info info;
53};
54
55union map_info *dm_get_mapinfo(struct bio *bio)
56{
Alasdair G Kergon17b2f662006-06-26 00:27:33 -070057 if (bio && bio->bi_private)
Alasdair G Kergon028867a2007-07-12 17:26:32 +010058 return &((struct dm_target_io *)bio->bi_private)->info;
Alasdair G Kergon17b2f662006-06-26 00:27:33 -070059 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -070060}
61
Jeff Mahoneyba61fdd2006-06-26 00:27:21 -070062#define MINOR_ALLOCED ((void *)-1)
63
Linus Torvalds1da177e2005-04-16 15:20:36 -070064/*
65 * Bits for the md->flags field.
66 */
67#define DMF_BLOCK_IO 0
68#define DMF_SUSPENDED 1
Alasdair G Kergonaa8d7c22006-01-06 00:20:06 -080069#define DMF_FROZEN 2
Jeff Mahoneyfba9f902006-06-26 00:27:23 -070070#define DMF_FREEING 3
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -070071#define DMF_DELETING 4
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -080072#define DMF_NOFLUSH_SUSPENDING 5
Linus Torvalds1da177e2005-04-16 15:20:36 -070073
Milan Broz304f3f62008-02-08 02:11:17 +000074/*
75 * Work processed by per-device workqueue.
76 */
77struct dm_wq_req {
78 enum {
79 DM_WQ_FLUSH_ALL,
80 DM_WQ_FLUSH_DEFERRED,
81 } type;
82 struct work_struct work;
83 struct mapped_device *md;
84 void *context;
85};
86
Linus Torvalds1da177e2005-04-16 15:20:36 -070087struct mapped_device {
Alasdair G Kergon2ca33102005-07-28 21:16:00 -070088 struct rw_semaphore io_lock;
Daniel Walkere61290a2008-02-08 02:10:08 +000089 struct mutex suspend_lock;
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -080090 spinlock_t pushback_lock;
Linus Torvalds1da177e2005-04-16 15:20:36 -070091 rwlock_t map_lock;
92 atomic_t holders;
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -070093 atomic_t open_count;
Linus Torvalds1da177e2005-04-16 15:20:36 -070094
95 unsigned long flags;
96
Jens Axboe165125e2007-07-24 09:28:11 +020097 struct request_queue *queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -070098 struct gendisk *disk;
Mike Anderson7e51f252006-03-27 01:17:52 -080099 char name[16];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100
101 void *interface_ptr;
102
103 /*
104 * A list of ios that arrived while we were suspended.
105 */
106 atomic_t pending;
107 wait_queue_head_t wait;
Kiyoshi Ueda74859362006-12-08 02:41:02 -0800108 struct bio_list deferred;
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800109 struct bio_list pushback;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110
111 /*
Milan Broz304f3f62008-02-08 02:11:17 +0000112 * Processing queue (flush/barriers)
113 */
114 struct workqueue_struct *wq;
115
116 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117 * The current mapping.
118 */
119 struct dm_table *map;
120
121 /*
122 * io objects are allocated from here.
123 */
124 mempool_t *io_pool;
125 mempool_t *tio_pool;
126
Stefan Bader9faf4002006-10-03 01:15:41 -0700127 struct bio_set *bs;
128
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129 /*
130 * Event handling.
131 */
132 atomic_t event_nr;
133 wait_queue_head_t eventq;
Mike Anderson7a8c3d32007-10-19 22:48:01 +0100134 atomic_t uevent_seq;
135 struct list_head uevent_list;
136 spinlock_t uevent_lock; /* Protect access to uevent_list */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137
138 /*
139 * freeze/thaw support require holding onto a super block
140 */
141 struct super_block *frozen_sb;
Alasdair G Kergone39e2e92006-01-06 00:20:05 -0800142 struct block_device *suspended_bdev;
Darrick J. Wong3ac51e72006-03-27 01:17:54 -0800143
144 /* forced geometry settings */
145 struct hd_geometry geometry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146};
147
148#define MIN_IOS 256
Christoph Lametere18b8902006-12-06 20:33:20 -0800149static struct kmem_cache *_io_cache;
150static struct kmem_cache *_tio_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152static int __init local_init(void)
153{
154 int r;
155
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 /* allocate a slab for the dm_ios */
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100157 _io_cache = KMEM_CACHE(dm_io, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 if (!_io_cache)
159 return -ENOMEM;
160
161 /* allocate a slab for the target ios */
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100162 _tio_cache = KMEM_CACHE(dm_target_io, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 if (!_tio_cache) {
164 kmem_cache_destroy(_io_cache);
165 return -ENOMEM;
166 }
167
Mike Anderson51e5b2b2007-10-19 22:48:00 +0100168 r = dm_uevent_init();
169 if (r) {
170 kmem_cache_destroy(_tio_cache);
171 kmem_cache_destroy(_io_cache);
172 return r;
173 }
174
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 _major = major;
176 r = register_blkdev(_major, _name);
177 if (r < 0) {
178 kmem_cache_destroy(_tio_cache);
179 kmem_cache_destroy(_io_cache);
Mike Anderson51e5b2b2007-10-19 22:48:00 +0100180 dm_uevent_exit();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 return r;
182 }
183
184 if (!_major)
185 _major = r;
186
187 return 0;
188}
189
190static void local_exit(void)
191{
192 kmem_cache_destroy(_tio_cache);
193 kmem_cache_destroy(_io_cache);
Akinobu Mita00d59402007-07-17 04:03:46 -0700194 unregister_blkdev(_major, _name);
Mike Anderson51e5b2b2007-10-19 22:48:00 +0100195 dm_uevent_exit();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196
197 _major = 0;
198
199 DMINFO("cleaned up");
200}
201
Alasdair G Kergonb9249e52008-02-08 02:09:51 +0000202static int (*_inits[])(void) __initdata = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203 local_init,
204 dm_target_init,
205 dm_linear_init,
206 dm_stripe_init,
Mikulas Patocka945fa4d2008-04-24 21:43:49 +0100207 dm_kcopyd_init,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 dm_interface_init,
209};
210
Alasdair G Kergonb9249e52008-02-08 02:09:51 +0000211static void (*_exits[])(void) = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212 local_exit,
213 dm_target_exit,
214 dm_linear_exit,
215 dm_stripe_exit,
Mikulas Patocka945fa4d2008-04-24 21:43:49 +0100216 dm_kcopyd_exit,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217 dm_interface_exit,
218};
219
220static int __init dm_init(void)
221{
222 const int count = ARRAY_SIZE(_inits);
223
224 int r, i;
225
226 for (i = 0; i < count; i++) {
227 r = _inits[i]();
228 if (r)
229 goto bad;
230 }
231
232 return 0;
233
234 bad:
235 while (i--)
236 _exits[i]();
237
238 return r;
239}
240
241static void __exit dm_exit(void)
242{
243 int i = ARRAY_SIZE(_exits);
244
245 while (i--)
246 _exits[i]();
247}
248
249/*
250 * Block device functions
251 */
252static int dm_blk_open(struct inode *inode, struct file *file)
253{
254 struct mapped_device *md;
255
Jeff Mahoneyfba9f902006-06-26 00:27:23 -0700256 spin_lock(&_minor_lock);
257
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 md = inode->i_bdev->bd_disk->private_data;
Jeff Mahoneyfba9f902006-06-26 00:27:23 -0700259 if (!md)
260 goto out;
261
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -0700262 if (test_bit(DMF_FREEING, &md->flags) ||
263 test_bit(DMF_DELETING, &md->flags)) {
Jeff Mahoneyfba9f902006-06-26 00:27:23 -0700264 md = NULL;
265 goto out;
266 }
267
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 dm_get(md);
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -0700269 atomic_inc(&md->open_count);
Jeff Mahoneyfba9f902006-06-26 00:27:23 -0700270
271out:
272 spin_unlock(&_minor_lock);
273
274 return md ? 0 : -ENXIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275}
276
277static int dm_blk_close(struct inode *inode, struct file *file)
278{
279 struct mapped_device *md;
280
281 md = inode->i_bdev->bd_disk->private_data;
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -0700282 atomic_dec(&md->open_count);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 dm_put(md);
284 return 0;
285}
286
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -0700287int dm_open_count(struct mapped_device *md)
288{
289 return atomic_read(&md->open_count);
290}
291
292/*
293 * Guarantees nothing is using the device before it's deleted.
294 */
295int dm_lock_for_deletion(struct mapped_device *md)
296{
297 int r = 0;
298
299 spin_lock(&_minor_lock);
300
301 if (dm_open_count(md))
302 r = -EBUSY;
303 else
304 set_bit(DMF_DELETING, &md->flags);
305
306 spin_unlock(&_minor_lock);
307
308 return r;
309}
310
Darrick J. Wong3ac51e72006-03-27 01:17:54 -0800311static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
312{
313 struct mapped_device *md = bdev->bd_disk->private_data;
314
315 return dm_get_geometry(md, geo);
316}
317
Milan Brozaa129a22006-10-03 01:15:15 -0700318static int dm_blk_ioctl(struct inode *inode, struct file *file,
319 unsigned int cmd, unsigned long arg)
320{
321 struct mapped_device *md;
322 struct dm_table *map;
323 struct dm_target *tgt;
324 int r = -ENOTTY;
325
326 /* We don't really need this lock, but we do need 'inode'. */
327 unlock_kernel();
328
329 md = inode->i_bdev->bd_disk->private_data;
330
331 map = dm_get_table(md);
332
333 if (!map || !dm_table_get_size(map))
334 goto out;
335
336 /* We only support devices that have a single target */
337 if (dm_table_get_num_targets(map) != 1)
338 goto out;
339
340 tgt = dm_table_get_target(map, 0);
341
342 if (dm_suspended(md)) {
343 r = -EAGAIN;
344 goto out;
345 }
346
347 if (tgt->type->ioctl)
348 r = tgt->type->ioctl(tgt, inode, file, cmd, arg);
349
350out:
351 dm_table_put(map);
352
353 lock_kernel();
354 return r;
355}
356
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100357static struct dm_io *alloc_io(struct mapped_device *md)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358{
359 return mempool_alloc(md->io_pool, GFP_NOIO);
360}
361
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100362static void free_io(struct mapped_device *md, struct dm_io *io)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363{
364 mempool_free(io, md->io_pool);
365}
366
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100367static struct dm_target_io *alloc_tio(struct mapped_device *md)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368{
369 return mempool_alloc(md->tio_pool, GFP_NOIO);
370}
371
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100372static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373{
374 mempool_free(tio, md->tio_pool);
375}
376
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800377static void start_io_acct(struct dm_io *io)
378{
379 struct mapped_device *md = io->md;
Tejun Heoc9959052008-08-25 19:47:21 +0900380 int cpu;
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800381
382 io->start_time = jiffies;
383
Tejun Heo074a7ac2008-08-25 19:56:14 +0900384 cpu = part_stat_lock();
385 part_round_stats(cpu, &dm_disk(md)->part0);
386 part_stat_unlock();
387 dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending);
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800388}
389
390static int end_io_acct(struct dm_io *io)
391{
392 struct mapped_device *md = io->md;
393 struct bio *bio = io->bio;
394 unsigned long duration = jiffies - io->start_time;
Tejun Heoc9959052008-08-25 19:47:21 +0900395 int pending, cpu;
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800396 int rw = bio_data_dir(bio);
397
Tejun Heo074a7ac2008-08-25 19:56:14 +0900398 cpu = part_stat_lock();
399 part_round_stats(cpu, &dm_disk(md)->part0);
400 part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
401 part_stat_unlock();
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800402
Tejun Heo074a7ac2008-08-25 19:56:14 +0900403 dm_disk(md)->part0.in_flight = pending =
404 atomic_dec_return(&md->pending);
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800405
406 return !pending;
407}
408
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409/*
410 * Add the bio to the list of deferred io.
411 */
412static int queue_io(struct mapped_device *md, struct bio *bio)
413{
Alasdair G Kergon2ca33102005-07-28 21:16:00 -0700414 down_write(&md->io_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415
416 if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
Alasdair G Kergon2ca33102005-07-28 21:16:00 -0700417 up_write(&md->io_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 return 1;
419 }
420
421 bio_list_add(&md->deferred, bio);
422
Alasdair G Kergon2ca33102005-07-28 21:16:00 -0700423 up_write(&md->io_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424 return 0; /* deferred successfully */
425}
426
427/*
428 * Everyone (including functions in this file), should use this
429 * function to access the md->map field, and make sure they call
430 * dm_table_put() when finished.
431 */
432struct dm_table *dm_get_table(struct mapped_device *md)
433{
434 struct dm_table *t;
435
436 read_lock(&md->map_lock);
437 t = md->map;
438 if (t)
439 dm_table_get(t);
440 read_unlock(&md->map_lock);
441
442 return t;
443}
444
Darrick J. Wong3ac51e72006-03-27 01:17:54 -0800445/*
446 * Get the geometry associated with a dm device
447 */
448int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo)
449{
450 *geo = md->geometry;
451
452 return 0;
453}
454
455/*
456 * Set the geometry of a device.
457 */
458int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo)
459{
460 sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors;
461
462 if (geo->start > sz) {
463 DMWARN("Start sector is beyond the geometry limits.");
464 return -EINVAL;
465 }
466
467 md->geometry = *geo;
468
469 return 0;
470}
471
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472/*-----------------------------------------------------------------
473 * CRUD START:
474 * A more elegant soln is in the works that uses the queue
475 * merge fn, unfortunately there are a couple of changes to
476 * the block layer that I want to make for this. So in the
477 * interests of getting something for people to use I give
478 * you this clearly demarcated crap.
479 *---------------------------------------------------------------*/
480
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800481static int __noflush_suspending(struct mapped_device *md)
482{
483 return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
484}
485
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486/*
487 * Decrements the number of outstanding ios that a bio has been
488 * cloned into, completing the original io if necc.
489 */
Arjan van de Ven858119e2006-01-14 13:20:43 -0800490static void dec_pending(struct dm_io *io, int error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491{
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800492 unsigned long flags;
493
494 /* Push-back supersedes any I/O errors */
495 if (error && !(io->error > 0 && __noflush_suspending(io->md)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496 io->error = error;
497
498 if (atomic_dec_and_test(&io->io_count)) {
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800499 if (io->error == DM_ENDIO_REQUEUE) {
500 /*
501 * Target requested pushing back the I/O.
502 * This must be handled before the sleeper on
503 * suspend queue merges the pushback list.
504 */
505 spin_lock_irqsave(&io->md->pushback_lock, flags);
506 if (__noflush_suspending(io->md))
507 bio_list_add(&io->md->pushback, io->bio);
508 else
509 /* noflush suspend was interrupted. */
510 io->error = -EIO;
511 spin_unlock_irqrestore(&io->md->pushback_lock, flags);
512 }
513
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800514 if (end_io_acct(io))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 /* nudge anyone waiting on suspend queue */
516 wake_up(&io->md->wait);
517
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800518 if (io->error != DM_ENDIO_REQUEUE) {
519 blk_add_trace_bio(io->md->queue, io->bio,
520 BLK_TA_COMPLETE);
Jens Axboe2056a782006-03-23 20:00:26 +0100521
NeilBrown6712ecf2007-09-27 12:47:43 +0200522 bio_endio(io->bio, io->error);
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800523 }
524
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525 free_io(io->md, io);
526 }
527}
528
NeilBrown6712ecf2007-09-27 12:47:43 +0200529static void clone_endio(struct bio *bio, int error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530{
531 int r = 0;
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100532 struct dm_target_io *tio = bio->bi_private;
Stefan Bader9faf4002006-10-03 01:15:41 -0700533 struct mapped_device *md = tio->io->md;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534 dm_endio_fn endio = tio->ti->type->end_io;
535
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 if (!bio_flagged(bio, BIO_UPTODATE) && !error)
537 error = -EIO;
538
539 if (endio) {
540 r = endio(tio->ti, bio, error, &tio->info);
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800541 if (r < 0 || r == DM_ENDIO_REQUEUE)
542 /*
543 * error and requeue request are handled
544 * in dec_pending().
545 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546 error = r;
Kiyoshi Ueda45cbcd72006-12-08 02:41:05 -0800547 else if (r == DM_ENDIO_INCOMPLETE)
548 /* The target will handle the io */
NeilBrown6712ecf2007-09-27 12:47:43 +0200549 return;
Kiyoshi Ueda45cbcd72006-12-08 02:41:05 -0800550 else if (r) {
551 DMWARN("unimplemented target endio return value: %d", r);
552 BUG();
553 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 }
555
Stefan Bader9faf4002006-10-03 01:15:41 -0700556 dec_pending(tio->io, error);
557
558 /*
559 * Store md for cleanup instead of tio which is about to get freed.
560 */
561 bio->bi_private = md->bs;
562
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563 bio_put(bio);
Stefan Bader9faf4002006-10-03 01:15:41 -0700564 free_tio(md, tio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565}
566
567static sector_t max_io_len(struct mapped_device *md,
568 sector_t sector, struct dm_target *ti)
569{
570 sector_t offset = sector - ti->begin;
571 sector_t len = ti->len - offset;
572
573 /*
574 * Does the target need to split even further ?
575 */
576 if (ti->split_io) {
577 sector_t boundary;
578 boundary = ((offset + ti->split_io) & ~(ti->split_io - 1))
579 - offset;
580 if (len > boundary)
581 len = boundary;
582 }
583
584 return len;
585}
586
587static void __map_bio(struct dm_target *ti, struct bio *clone,
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100588 struct dm_target_io *tio)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589{
590 int r;
Jens Axboe2056a782006-03-23 20:00:26 +0100591 sector_t sector;
Stefan Bader9faf4002006-10-03 01:15:41 -0700592 struct mapped_device *md;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593
594 /*
595 * Sanity checks.
596 */
597 BUG_ON(!clone->bi_size);
598
599 clone->bi_end_io = clone_endio;
600 clone->bi_private = tio;
601
602 /*
603 * Map the clone. If r == 0 we don't need to do
604 * anything, the target has assumed ownership of
605 * this io.
606 */
607 atomic_inc(&tio->io->io_count);
Jens Axboe2056a782006-03-23 20:00:26 +0100608 sector = clone->bi_sector;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 r = ti->type->map(ti, clone, &tio->info);
Kiyoshi Ueda45cbcd72006-12-08 02:41:05 -0800610 if (r == DM_MAPIO_REMAPPED) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611 /* the bio has been remapped so dispatch it */
Jens Axboe2056a782006-03-23 20:00:26 +0100612
Alasdair G Kergon17b2f662006-06-26 00:27:33 -0700613 blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone,
Alan D. Brunellec7149d62007-08-07 15:30:23 +0200614 tio->io->bio->bi_bdev->bd_dev,
615 clone->bi_sector, sector);
Jens Axboe2056a782006-03-23 20:00:26 +0100616
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617 generic_make_request(clone);
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -0800618 } else if (r < 0 || r == DM_MAPIO_REQUEUE) {
619 /* error the io and bail out, or requeue it if needed */
Stefan Bader9faf4002006-10-03 01:15:41 -0700620 md = tio->io->md;
621 dec_pending(tio->io, r);
622 /*
623 * Store bio_set for cleanup.
624 */
625 clone->bi_private = md->bs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 bio_put(clone);
Stefan Bader9faf4002006-10-03 01:15:41 -0700627 free_tio(md, tio);
Kiyoshi Ueda45cbcd72006-12-08 02:41:05 -0800628 } else if (r) {
629 DMWARN("unimplemented target map return value: %d", r);
630 BUG();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 }
632}
633
634struct clone_info {
635 struct mapped_device *md;
636 struct dm_table *map;
637 struct bio *bio;
638 struct dm_io *io;
639 sector_t sector;
640 sector_t sector_count;
641 unsigned short idx;
642};
643
Peter Osterlund36763472005-09-06 15:16:42 -0700644static void dm_bio_destructor(struct bio *bio)
645{
Stefan Bader9faf4002006-10-03 01:15:41 -0700646 struct bio_set *bs = bio->bi_private;
647
648 bio_free(bio, bs);
Peter Osterlund36763472005-09-06 15:16:42 -0700649}
650
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651/*
652 * Creates a little bio that is just does part of a bvec.
653 */
654static struct bio *split_bvec(struct bio *bio, sector_t sector,
655 unsigned short idx, unsigned int offset,
Stefan Bader9faf4002006-10-03 01:15:41 -0700656 unsigned int len, struct bio_set *bs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657{
658 struct bio *clone;
659 struct bio_vec *bv = bio->bi_io_vec + idx;
660
Stefan Bader9faf4002006-10-03 01:15:41 -0700661 clone = bio_alloc_bioset(GFP_NOIO, 1, bs);
Peter Osterlund36763472005-09-06 15:16:42 -0700662 clone->bi_destructor = dm_bio_destructor;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663 *clone->bi_io_vec = *bv;
664
665 clone->bi_sector = sector;
666 clone->bi_bdev = bio->bi_bdev;
667 clone->bi_rw = bio->bi_rw;
668 clone->bi_vcnt = 1;
669 clone->bi_size = to_bytes(len);
670 clone->bi_io_vec->bv_offset = offset;
671 clone->bi_io_vec->bv_len = clone->bi_size;
Martin K. Petersenf3e1d262008-10-21 17:45:04 +0100672 clone->bi_flags |= 1 << BIO_CLONED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673
674 return clone;
675}
676
677/*
678 * Creates a bio that consists of range of complete bvecs.
679 */
680static struct bio *clone_bio(struct bio *bio, sector_t sector,
681 unsigned short idx, unsigned short bv_count,
Stefan Bader9faf4002006-10-03 01:15:41 -0700682 unsigned int len, struct bio_set *bs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683{
684 struct bio *clone;
685
Stefan Bader9faf4002006-10-03 01:15:41 -0700686 clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
687 __bio_clone(clone, bio);
688 clone->bi_destructor = dm_bio_destructor;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689 clone->bi_sector = sector;
690 clone->bi_idx = idx;
691 clone->bi_vcnt = idx + bv_count;
692 clone->bi_size = to_bytes(len);
693 clone->bi_flags &= ~(1 << BIO_SEG_VALID);
694
695 return clone;
696}
697
Jun'ichi Nomura512875b2007-12-13 14:15:25 +0000698static int __clone_and_map(struct clone_info *ci)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699{
700 struct bio *clone, *bio = ci->bio;
Jun'ichi Nomura512875b2007-12-13 14:15:25 +0000701 struct dm_target *ti;
702 sector_t len = 0, max;
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100703 struct dm_target_io *tio;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704
Jun'ichi Nomura512875b2007-12-13 14:15:25 +0000705 ti = dm_table_find_target(ci->map, ci->sector);
706 if (!dm_target_is_valid(ti))
707 return -EIO;
708
709 max = max_io_len(ci->md, ci->sector, ti);
710
Linus Torvalds1da177e2005-04-16 15:20:36 -0700711 /*
712 * Allocate a target io object.
713 */
714 tio = alloc_tio(ci->md);
715 tio->io = ci->io;
716 tio->ti = ti;
717 memset(&tio->info, 0, sizeof(tio->info));
718
719 if (ci->sector_count <= max) {
720 /*
721 * Optimise for the simple case where we can do all of
722 * the remaining io with a single clone.
723 */
724 clone = clone_bio(bio, ci->sector, ci->idx,
Stefan Bader9faf4002006-10-03 01:15:41 -0700725 bio->bi_vcnt - ci->idx, ci->sector_count,
726 ci->md->bs);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727 __map_bio(ti, clone, tio);
728 ci->sector_count = 0;
729
730 } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) {
731 /*
732 * There are some bvecs that don't span targets.
733 * Do as many of these as possible.
734 */
735 int i;
736 sector_t remaining = max;
737 sector_t bv_len;
738
739 for (i = ci->idx; remaining && (i < bio->bi_vcnt); i++) {
740 bv_len = to_sector(bio->bi_io_vec[i].bv_len);
741
742 if (bv_len > remaining)
743 break;
744
745 remaining -= bv_len;
746 len += bv_len;
747 }
748
Stefan Bader9faf4002006-10-03 01:15:41 -0700749 clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len,
750 ci->md->bs);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751 __map_bio(ti, clone, tio);
752
753 ci->sector += len;
754 ci->sector_count -= len;
755 ci->idx = i;
756
757 } else {
758 /*
Alasdair G Kergond2044a92006-03-22 00:07:42 -0800759 * Handle a bvec that must be split between two or more targets.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 */
761 struct bio_vec *bv = bio->bi_io_vec + ci->idx;
Alasdair G Kergond2044a92006-03-22 00:07:42 -0800762 sector_t remaining = to_sector(bv->bv_len);
763 unsigned int offset = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764
Alasdair G Kergond2044a92006-03-22 00:07:42 -0800765 do {
766 if (offset) {
767 ti = dm_table_find_target(ci->map, ci->sector);
Jun'ichi Nomura512875b2007-12-13 14:15:25 +0000768 if (!dm_target_is_valid(ti))
769 return -EIO;
770
Alasdair G Kergond2044a92006-03-22 00:07:42 -0800771 max = max_io_len(ci->md, ci->sector, ti);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772
Alasdair G Kergond2044a92006-03-22 00:07:42 -0800773 tio = alloc_tio(ci->md);
774 tio->io = ci->io;
775 tio->ti = ti;
776 memset(&tio->info, 0, sizeof(tio->info));
777 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778
Alasdair G Kergond2044a92006-03-22 00:07:42 -0800779 len = min(remaining, max);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780
Alasdair G Kergond2044a92006-03-22 00:07:42 -0800781 clone = split_bvec(bio, ci->sector, ci->idx,
Stefan Bader9faf4002006-10-03 01:15:41 -0700782 bv->bv_offset + offset, len,
783 ci->md->bs);
Alasdair G Kergond2044a92006-03-22 00:07:42 -0800784
785 __map_bio(ti, clone, tio);
786
787 ci->sector += len;
788 ci->sector_count -= len;
789 offset += to_bytes(len);
790 } while (remaining -= len);
791
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 ci->idx++;
793 }
Jun'ichi Nomura512875b2007-12-13 14:15:25 +0000794
795 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796}
797
798/*
799 * Split the bio into several clones.
800 */
Milan Broz9e4e5f82007-10-19 22:38:53 +0100801static int __split_bio(struct mapped_device *md, struct bio *bio)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802{
803 struct clone_info ci;
Jun'ichi Nomura512875b2007-12-13 14:15:25 +0000804 int error = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805
806 ci.map = dm_get_table(md);
Milan Broz9e4e5f82007-10-19 22:38:53 +0100807 if (unlikely(!ci.map))
808 return -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809
810 ci.md = md;
811 ci.bio = bio;
812 ci.io = alloc_io(md);
813 ci.io->error = 0;
814 atomic_set(&ci.io->io_count, 1);
815 ci.io->bio = bio;
816 ci.io->md = md;
817 ci.sector = bio->bi_sector;
818 ci.sector_count = bio_sectors(bio);
819 ci.idx = bio->bi_idx;
820
Jun'ichi "Nick" Nomura3eaf8402006-02-01 03:04:53 -0800821 start_io_acct(ci.io);
Jun'ichi Nomura512875b2007-12-13 14:15:25 +0000822 while (ci.sector_count && !error)
823 error = __clone_and_map(&ci);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824
825 /* drop the extra reference count */
Jun'ichi Nomura512875b2007-12-13 14:15:25 +0000826 dec_pending(ci.io, error);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 dm_table_put(ci.map);
Milan Broz9e4e5f82007-10-19 22:38:53 +0100828
829 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830}
831/*-----------------------------------------------------------------
832 * CRUD END
833 *---------------------------------------------------------------*/
834
Milan Brozf6fccb12008-07-21 12:00:37 +0100835static int dm_merge_bvec(struct request_queue *q,
836 struct bvec_merge_data *bvm,
837 struct bio_vec *biovec)
838{
839 struct mapped_device *md = q->queuedata;
840 struct dm_table *map = dm_get_table(md);
841 struct dm_target *ti;
842 sector_t max_sectors;
Mikulas Patocka50371082008-10-01 14:39:17 +0100843 int max_size = 0;
Milan Brozf6fccb12008-07-21 12:00:37 +0100844
845 if (unlikely(!map))
Mikulas Patocka50371082008-10-01 14:39:17 +0100846 goto out;
Milan Brozf6fccb12008-07-21 12:00:37 +0100847
848 ti = dm_table_find_target(map, bvm->bi_sector);
Mikulas Patockab01cd5a2008-10-01 14:39:24 +0100849 if (!dm_target_is_valid(ti))
850 goto out_table;
Milan Brozf6fccb12008-07-21 12:00:37 +0100851
852 /*
853 * Find maximum amount of I/O that won't need splitting
854 */
855 max_sectors = min(max_io_len(md, bvm->bi_sector, ti),
856 (sector_t) BIO_MAX_SECTORS);
857 max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
858 if (max_size < 0)
859 max_size = 0;
860
861 /*
862 * merge_bvec_fn() returns number of bytes
863 * it can accept at this offset
864 * max is precomputed maximal io size
865 */
866 if (max_size && ti->type->merge)
867 max_size = ti->type->merge(ti, bvm, biovec, max_size);
868
Mikulas Patockab01cd5a2008-10-01 14:39:24 +0100869out_table:
Mikulas Patocka50371082008-10-01 14:39:17 +0100870 dm_table_put(map);
871
872out:
Milan Brozf6fccb12008-07-21 12:00:37 +0100873 /*
874 * Always allow an entire first page
875 */
876 if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT))
877 max_size = biovec->bv_len;
878
Milan Brozf6fccb12008-07-21 12:00:37 +0100879 return max_size;
880}
881
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882/*
883 * The request function that just remaps the bio built up by
884 * dm_merge_bvec.
885 */
Jens Axboe165125e2007-07-24 09:28:11 +0200886static int dm_request(struct request_queue *q, struct bio *bio)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887{
Milan Broz9e4e5f82007-10-19 22:38:53 +0100888 int r = -EIO;
Kevin Corry12f03a42006-02-01 03:04:52 -0800889 int rw = bio_data_dir(bio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890 struct mapped_device *md = q->queuedata;
Tejun Heoc9959052008-08-25 19:47:21 +0900891 int cpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700892
Stefan Bader07a83c42007-07-12 17:28:33 +0100893 /*
894 * There is no use in forwarding any barrier request since we can't
895 * guarantee it is (or can be) handled by the targets correctly.
896 */
897 if (unlikely(bio_barrier(bio))) {
NeilBrown6712ecf2007-09-27 12:47:43 +0200898 bio_endio(bio, -EOPNOTSUPP);
Stefan Bader07a83c42007-07-12 17:28:33 +0100899 return 0;
900 }
901
Alasdair G Kergon2ca33102005-07-28 21:16:00 -0700902 down_read(&md->io_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903
Tejun Heo074a7ac2008-08-25 19:56:14 +0900904 cpu = part_stat_lock();
905 part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]);
906 part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio));
907 part_stat_unlock();
Kevin Corry12f03a42006-02-01 03:04:52 -0800908
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 /*
910 * If we're suspended we have to queue
911 * this io for later.
912 */
913 while (test_bit(DMF_BLOCK_IO, &md->flags)) {
Alasdair G Kergon2ca33102005-07-28 21:16:00 -0700914 up_read(&md->io_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915
Milan Broz9e4e5f82007-10-19 22:38:53 +0100916 if (bio_rw(bio) != READA)
917 r = queue_io(md, bio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918
Milan Broz9e4e5f82007-10-19 22:38:53 +0100919 if (r <= 0)
920 goto out_req;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921
922 /*
923 * We're in a while loop, because someone could suspend
924 * before we get to the following read lock.
925 */
Alasdair G Kergon2ca33102005-07-28 21:16:00 -0700926 down_read(&md->io_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927 }
928
Milan Broz9e4e5f82007-10-19 22:38:53 +0100929 r = __split_bio(md, bio);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -0700930 up_read(&md->io_lock);
Milan Broz9e4e5f82007-10-19 22:38:53 +0100931
932out_req:
933 if (r < 0)
934 bio_io_error(bio);
935
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 return 0;
937}
938
Jens Axboe165125e2007-07-24 09:28:11 +0200939static void dm_unplug_all(struct request_queue *q)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940{
941 struct mapped_device *md = q->queuedata;
942 struct dm_table *map = dm_get_table(md);
943
944 if (map) {
945 dm_table_unplug_all(map);
946 dm_table_put(map);
947 }
948}
949
950static int dm_any_congested(void *congested_data, int bdi_bits)
951{
952 int r;
953 struct mapped_device *md = (struct mapped_device *) congested_data;
954 struct dm_table *map = dm_get_table(md);
955
956 if (!map || test_bit(DMF_BLOCK_IO, &md->flags))
957 r = bdi_bits;
958 else
959 r = dm_table_any_congested(map, bdi_bits);
960
961 dm_table_put(map);
962 return r;
963}
964
965/*-----------------------------------------------------------------
966 * An IDR is used to keep track of allocated minor numbers.
967 *---------------------------------------------------------------*/
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968static DEFINE_IDR(_minor_idr);
969
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -0700970static void free_minor(int minor)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971{
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -0700972 spin_lock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973 idr_remove(&_minor_idr, minor);
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -0700974 spin_unlock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975}
976
977/*
978 * See if the device with a specific minor # is free.
979 */
Frederik Deweerdtcf13ab82008-04-24 22:10:59 +0100980static int specific_minor(int minor)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981{
982 int r, m;
983
984 if (minor >= (1 << MINORBITS))
985 return -EINVAL;
986
Jeff Mahoney62f75c22006-06-26 00:27:21 -0700987 r = idr_pre_get(&_minor_idr, GFP_KERNEL);
988 if (!r)
989 return -ENOMEM;
990
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -0700991 spin_lock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992
993 if (idr_find(&_minor_idr, minor)) {
994 r = -EBUSY;
995 goto out;
996 }
997
Jeff Mahoneyba61fdd2006-06-26 00:27:21 -0700998 r = idr_get_new_above(&_minor_idr, MINOR_ALLOCED, minor, &m);
Jeff Mahoney62f75c22006-06-26 00:27:21 -0700999 if (r)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001001
1002 if (m != minor) {
1003 idr_remove(&_minor_idr, m);
1004 r = -EBUSY;
1005 goto out;
1006 }
1007
1008out:
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001009 spin_unlock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010 return r;
1011}
1012
Frederik Deweerdtcf13ab82008-04-24 22:10:59 +01001013static int next_free_minor(int *minor)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014{
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07001015 int r, m;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 r = idr_pre_get(&_minor_idr, GFP_KERNEL);
Jeff Mahoney62f75c22006-06-26 00:27:21 -07001018 if (!r)
1019 return -ENOMEM;
1020
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001021 spin_lock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022
Jeff Mahoneyba61fdd2006-06-26 00:27:21 -07001023 r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m);
Frederik Deweerdtcf13ab82008-04-24 22:10:59 +01001024 if (r)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026
1027 if (m >= (1 << MINORBITS)) {
1028 idr_remove(&_minor_idr, m);
1029 r = -ENOSPC;
1030 goto out;
1031 }
1032
1033 *minor = m;
1034
1035out:
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001036 spin_unlock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037 return r;
1038}
1039
1040static struct block_device_operations dm_blk_dops;
1041
1042/*
1043 * Allocate and initialise a blank device with a given minor.
1044 */
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07001045static struct mapped_device *alloc_dev(int minor)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046{
1047 int r;
Frederik Deweerdtcf13ab82008-04-24 22:10:59 +01001048 struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL);
Jeff Mahoneyba61fdd2006-06-26 00:27:21 -07001049 void *old_md;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050
1051 if (!md) {
1052 DMWARN("unable to allocate device, out of memory.");
1053 return NULL;
1054 }
1055
Jeff Mahoney10da4f72006-06-26 00:27:25 -07001056 if (!try_module_get(THIS_MODULE))
Milan Broz6ed7ade2008-02-08 02:10:19 +00001057 goto bad_module_get;
Jeff Mahoney10da4f72006-06-26 00:27:25 -07001058
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059 /* get a minor number for the dev */
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07001060 if (minor == DM_ANY_MINOR)
Frederik Deweerdtcf13ab82008-04-24 22:10:59 +01001061 r = next_free_minor(&minor);
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07001062 else
Frederik Deweerdtcf13ab82008-04-24 22:10:59 +01001063 r = specific_minor(minor);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064 if (r < 0)
Milan Broz6ed7ade2008-02-08 02:10:19 +00001065 goto bad_minor;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001066
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001067 init_rwsem(&md->io_lock);
Daniel Walkere61290a2008-02-08 02:10:08 +00001068 mutex_init(&md->suspend_lock);
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -08001069 spin_lock_init(&md->pushback_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070 rwlock_init(&md->map_lock);
1071 atomic_set(&md->holders, 1);
Alasdair G Kergon5c6bd752006-06-26 00:27:34 -07001072 atomic_set(&md->open_count, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073 atomic_set(&md->event_nr, 0);
Mike Anderson7a8c3d32007-10-19 22:48:01 +01001074 atomic_set(&md->uevent_seq, 0);
1075 INIT_LIST_HEAD(&md->uevent_list);
1076 spin_lock_init(&md->uevent_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077
1078 md->queue = blk_alloc_queue(GFP_KERNEL);
1079 if (!md->queue)
Milan Broz6ed7ade2008-02-08 02:10:19 +00001080 goto bad_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081
1082 md->queue->queuedata = md;
1083 md->queue->backing_dev_info.congested_fn = dm_any_congested;
1084 md->queue->backing_dev_info.congested_data = md;
1085 blk_queue_make_request(md->queue, dm_request);
Jens Axboedaef2652006-01-10 10:48:02 +01001086 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087 md->queue->unplug_fn = dm_unplug_all;
Milan Brozf6fccb12008-07-21 12:00:37 +01001088 blk_queue_merge_bvec(md->queue, dm_merge_bvec);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089
Matthew Dobson93d23412006-03-26 01:37:50 -08001090 md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache);
Kiyoshi Ueda74859362006-12-08 02:41:02 -08001091 if (!md->io_pool)
Milan Broz6ed7ade2008-02-08 02:10:19 +00001092 goto bad_io_pool;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093
Matthew Dobson93d23412006-03-26 01:37:50 -08001094 md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095 if (!md->tio_pool)
Milan Broz6ed7ade2008-02-08 02:10:19 +00001096 goto bad_tio_pool;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097
Jens Axboe59725112007-04-02 10:06:42 +02001098 md->bs = bioset_create(16, 16);
Stefan Bader9faf4002006-10-03 01:15:41 -07001099 if (!md->bs)
1100 goto bad_no_bioset;
1101
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102 md->disk = alloc_disk(1);
1103 if (!md->disk)
Milan Broz6ed7ade2008-02-08 02:10:19 +00001104 goto bad_disk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105
Jeff Mahoneyf0b04112006-06-26 00:27:25 -07001106 atomic_set(&md->pending, 0);
1107 init_waitqueue_head(&md->wait);
1108 init_waitqueue_head(&md->eventq);
1109
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110 md->disk->major = _major;
1111 md->disk->first_minor = minor;
1112 md->disk->fops = &dm_blk_dops;
1113 md->disk->queue = md->queue;
1114 md->disk->private_data = md;
1115 sprintf(md->disk->disk_name, "dm-%d", minor);
1116 add_disk(md->disk);
Mike Anderson7e51f252006-03-27 01:17:52 -08001117 format_dev_t(md->name, MKDEV(_major, minor));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118
Milan Broz304f3f62008-02-08 02:11:17 +00001119 md->wq = create_singlethread_workqueue("kdmflush");
1120 if (!md->wq)
1121 goto bad_thread;
1122
Jeff Mahoneyba61fdd2006-06-26 00:27:21 -07001123 /* Populate the mapping, nobody knows we exist yet */
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001124 spin_lock(&_minor_lock);
Jeff Mahoneyba61fdd2006-06-26 00:27:21 -07001125 old_md = idr_replace(&_minor_idr, md, minor);
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001126 spin_unlock(&_minor_lock);
Jeff Mahoneyba61fdd2006-06-26 00:27:21 -07001127
1128 BUG_ON(old_md != MINOR_ALLOCED);
1129
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 return md;
1131
Milan Broz304f3f62008-02-08 02:11:17 +00001132bad_thread:
1133 put_disk(md->disk);
Milan Broz6ed7ade2008-02-08 02:10:19 +00001134bad_disk:
Stefan Bader9faf4002006-10-03 01:15:41 -07001135 bioset_free(md->bs);
Milan Broz6ed7ade2008-02-08 02:10:19 +00001136bad_no_bioset:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 mempool_destroy(md->tio_pool);
Milan Broz6ed7ade2008-02-08 02:10:19 +00001138bad_tio_pool:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 mempool_destroy(md->io_pool);
Milan Broz6ed7ade2008-02-08 02:10:19 +00001140bad_io_pool:
Al Viro1312f402006-03-12 11:02:03 -05001141 blk_cleanup_queue(md->queue);
Milan Broz6ed7ade2008-02-08 02:10:19 +00001142bad_queue:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 free_minor(minor);
Milan Broz6ed7ade2008-02-08 02:10:19 +00001144bad_minor:
Jeff Mahoney10da4f72006-06-26 00:27:25 -07001145 module_put(THIS_MODULE);
Milan Broz6ed7ade2008-02-08 02:10:19 +00001146bad_module_get:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 kfree(md);
1148 return NULL;
1149}
1150
Jun'ichi Nomuraae9da832007-10-19 22:38:43 +01001151static void unlock_fs(struct mapped_device *md);
1152
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153static void free_dev(struct mapped_device *md)
1154{
Tejun Heof331c022008-09-03 09:01:48 +02001155 int minor = MINOR(disk_devt(md->disk));
Jun'ichi Nomura63d94e42006-02-24 13:04:25 -08001156
Jun'ichi Nomurad9dde592006-02-24 13:04:24 -08001157 if (md->suspended_bdev) {
Jun'ichi Nomuraae9da832007-10-19 22:38:43 +01001158 unlock_fs(md);
Jun'ichi Nomurad9dde592006-02-24 13:04:24 -08001159 bdput(md->suspended_bdev);
1160 }
Milan Broz304f3f62008-02-08 02:11:17 +00001161 destroy_workqueue(md->wq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162 mempool_destroy(md->tio_pool);
1163 mempool_destroy(md->io_pool);
Stefan Bader9faf4002006-10-03 01:15:41 -07001164 bioset_free(md->bs);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165 del_gendisk(md->disk);
Jun'ichi Nomura63d94e42006-02-24 13:04:25 -08001166 free_minor(minor);
Jeff Mahoneyfba9f902006-06-26 00:27:23 -07001167
1168 spin_lock(&_minor_lock);
1169 md->disk->private_data = NULL;
1170 spin_unlock(&_minor_lock);
1171
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172 put_disk(md->disk);
Al Viro1312f402006-03-12 11:02:03 -05001173 blk_cleanup_queue(md->queue);
Jeff Mahoney10da4f72006-06-26 00:27:25 -07001174 module_put(THIS_MODULE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175 kfree(md);
1176}
1177
1178/*
1179 * Bind a table to the device.
1180 */
1181static void event_callback(void *context)
1182{
Mike Anderson7a8c3d32007-10-19 22:48:01 +01001183 unsigned long flags;
1184 LIST_HEAD(uevents);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185 struct mapped_device *md = (struct mapped_device *) context;
1186
Mike Anderson7a8c3d32007-10-19 22:48:01 +01001187 spin_lock_irqsave(&md->uevent_lock, flags);
1188 list_splice_init(&md->uevent_list, &uevents);
1189 spin_unlock_irqrestore(&md->uevent_lock, flags);
1190
Tejun Heoed9e1982008-08-25 19:56:05 +09001191 dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
Mike Anderson7a8c3d32007-10-19 22:48:01 +01001192
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193 atomic_inc(&md->event_nr);
1194 wake_up(&md->eventq);
1195}
1196
Alasdair G Kergon4e901882005-07-28 21:15:59 -07001197static void __set_size(struct mapped_device *md, sector_t size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198{
Alasdair G Kergon4e901882005-07-28 21:15:59 -07001199 set_capacity(md->disk, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200
Jes Sorensen1b1dcc12006-01-09 15:59:24 -08001201 mutex_lock(&md->suspended_bdev->bd_inode->i_mutex);
Alasdair G Kergone39e2e92006-01-06 00:20:05 -08001202 i_size_write(md->suspended_bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
Jes Sorensen1b1dcc12006-01-09 15:59:24 -08001203 mutex_unlock(&md->suspended_bdev->bd_inode->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204}
1205
1206static int __bind(struct mapped_device *md, struct dm_table *t)
1207{
Jens Axboe165125e2007-07-24 09:28:11 +02001208 struct request_queue *q = md->queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 sector_t size;
1210
1211 size = dm_table_get_size(t);
Darrick J. Wong3ac51e72006-03-27 01:17:54 -08001212
1213 /*
1214 * Wipe any geometry if the size of the table changed.
1215 */
1216 if (size != get_capacity(md->disk))
1217 memset(&md->geometry, 0, sizeof(md->geometry));
1218
Jun'ichi Nomurabfa152f2007-01-26 00:57:07 -08001219 if (md->suspended_bdev)
1220 __set_size(md, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221 if (size == 0)
1222 return 0;
1223
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224 dm_table_get(t);
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001225 dm_table_event_callback(t, event_callback, md);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001226
1227 write_lock(&md->map_lock);
1228 md->map = t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229 dm_table_set_restrictions(t, q);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001230 write_unlock(&md->map_lock);
1231
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232 return 0;
1233}
1234
1235static void __unbind(struct mapped_device *md)
1236{
1237 struct dm_table *map = md->map;
1238
1239 if (!map)
1240 return;
1241
1242 dm_table_event_callback(map, NULL, NULL);
1243 write_lock(&md->map_lock);
1244 md->map = NULL;
1245 write_unlock(&md->map_lock);
1246 dm_table_put(map);
1247}
1248
1249/*
1250 * Constructor for a new device.
1251 */
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07001252int dm_create(int minor, struct mapped_device **result)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253{
1254 struct mapped_device *md;
1255
Alasdair G Kergon2b06cff2006-06-26 00:27:32 -07001256 md = alloc_dev(minor);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257 if (!md)
1258 return -ENXIO;
1259
1260 *result = md;
1261 return 0;
1262}
1263
David Teigland637842c2006-01-06 00:20:00 -08001264static struct mapped_device *dm_find_md(dev_t dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265{
1266 struct mapped_device *md;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267 unsigned minor = MINOR(dev);
1268
1269 if (MAJOR(dev) != _major || minor >= (1 << MINORBITS))
1270 return NULL;
1271
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001272 spin_lock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273
1274 md = idr_find(&_minor_idr, minor);
Jeff Mahoneyfba9f902006-06-26 00:27:23 -07001275 if (md && (md == MINOR_ALLOCED ||
Tejun Heof331c022008-09-03 09:01:48 +02001276 (MINOR(disk_devt(dm_disk(md))) != minor) ||
Alasdair G Kergon17b2f662006-06-26 00:27:33 -07001277 test_bit(DMF_FREEING, &md->flags))) {
David Teigland637842c2006-01-06 00:20:00 -08001278 md = NULL;
Jeff Mahoneyfba9f902006-06-26 00:27:23 -07001279 goto out;
1280 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281
Jeff Mahoneyfba9f902006-06-26 00:27:23 -07001282out:
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001283 spin_unlock(&_minor_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284
David Teigland637842c2006-01-06 00:20:00 -08001285 return md;
1286}
1287
David Teiglandd229a952006-01-06 00:20:01 -08001288struct mapped_device *dm_get_md(dev_t dev)
1289{
1290 struct mapped_device *md = dm_find_md(dev);
1291
1292 if (md)
1293 dm_get(md);
1294
1295 return md;
1296}
1297
Alasdair G Kergon9ade92a2006-03-27 01:17:53 -08001298void *dm_get_mdptr(struct mapped_device *md)
David Teigland637842c2006-01-06 00:20:00 -08001299{
Alasdair G Kergon9ade92a2006-03-27 01:17:53 -08001300 return md->interface_ptr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301}
1302
1303void dm_set_mdptr(struct mapped_device *md, void *ptr)
1304{
1305 md->interface_ptr = ptr;
1306}
1307
1308void dm_get(struct mapped_device *md)
1309{
1310 atomic_inc(&md->holders);
1311}
1312
Alasdair G Kergon72d94862006-06-26 00:27:35 -07001313const char *dm_device_name(struct mapped_device *md)
1314{
1315 return md->name;
1316}
1317EXPORT_SYMBOL_GPL(dm_device_name);
1318
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319void dm_put(struct mapped_device *md)
1320{
Mike Anderson1134e5a2006-03-27 01:17:54 -08001321 struct dm_table *map;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322
Jeff Mahoneyfba9f902006-06-26 00:27:23 -07001323 BUG_ON(test_bit(DMF_FREEING, &md->flags));
1324
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001325 if (atomic_dec_and_lock(&md->holders, &_minor_lock)) {
Mike Anderson1134e5a2006-03-27 01:17:54 -08001326 map = dm_get_table(md);
Tejun Heof331c022008-09-03 09:01:48 +02001327 idr_replace(&_minor_idr, MINOR_ALLOCED,
1328 MINOR(disk_devt(dm_disk(md))));
Jeff Mahoneyfba9f902006-06-26 00:27:23 -07001329 set_bit(DMF_FREEING, &md->flags);
Jeff Mahoneyf32c10b2006-06-26 00:27:22 -07001330 spin_unlock(&_minor_lock);
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001331 if (!dm_suspended(md)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 dm_table_presuspend_targets(map);
1333 dm_table_postsuspend_targets(map);
1334 }
1335 __unbind(md);
Mike Anderson1134e5a2006-03-27 01:17:54 -08001336 dm_table_put(map);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337 free_dev(md);
1338 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339}
Edward Goggin79eb8852007-05-09 02:32:56 -07001340EXPORT_SYMBOL_GPL(dm_put);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341
Milan Broz46125c12008-02-08 02:10:30 +00001342static int dm_wait_for_completion(struct mapped_device *md)
1343{
1344 int r = 0;
1345
1346 while (1) {
1347 set_current_state(TASK_INTERRUPTIBLE);
1348
1349 smp_mb();
1350 if (!atomic_read(&md->pending))
1351 break;
1352
1353 if (signal_pending(current)) {
1354 r = -EINTR;
1355 break;
1356 }
1357
1358 io_schedule();
1359 }
1360 set_current_state(TASK_RUNNING);
1361
1362 return r;
1363}
1364
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365/*
1366 * Process the deferred bios
1367 */
Milan Broz6d6f10d2008-02-08 02:10:22 +00001368static void __flush_deferred_io(struct mapped_device *md)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369{
Milan Broz6d6f10d2008-02-08 02:10:22 +00001370 struct bio *c;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371
Milan Broz6d6f10d2008-02-08 02:10:22 +00001372 while ((c = bio_list_pop(&md->deferred))) {
Milan Broz9e4e5f82007-10-19 22:38:53 +01001373 if (__split_bio(md, c))
1374 bio_io_error(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375 }
Milan Broz73d410c2008-02-08 02:10:25 +00001376
1377 clear_bit(DMF_BLOCK_IO, &md->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378}
1379
Milan Broz6d6f10d2008-02-08 02:10:22 +00001380static void __merge_pushback_list(struct mapped_device *md)
1381{
1382 unsigned long flags;
1383
1384 spin_lock_irqsave(&md->pushback_lock, flags);
1385 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
1386 bio_list_merge_head(&md->deferred, &md->pushback);
1387 bio_list_init(&md->pushback);
1388 spin_unlock_irqrestore(&md->pushback_lock, flags);
1389}
1390
Milan Broz304f3f62008-02-08 02:11:17 +00001391static void dm_wq_work(struct work_struct *work)
1392{
1393 struct dm_wq_req *req = container_of(work, struct dm_wq_req, work);
1394 struct mapped_device *md = req->md;
1395
1396 down_write(&md->io_lock);
1397 switch (req->type) {
1398 case DM_WQ_FLUSH_ALL:
1399 __merge_pushback_list(md);
1400 /* pass through */
1401 case DM_WQ_FLUSH_DEFERRED:
1402 __flush_deferred_io(md);
1403 break;
1404 default:
1405 DMERR("dm_wq_work: unrecognised work type %d", req->type);
1406 BUG();
1407 }
1408 up_write(&md->io_lock);
1409}
1410
1411static void dm_wq_queue(struct mapped_device *md, int type, void *context,
1412 struct dm_wq_req *req)
1413{
1414 req->type = type;
1415 req->md = md;
1416 req->context = context;
1417 INIT_WORK(&req->work, dm_wq_work);
1418 queue_work(md->wq, &req->work);
1419}
1420
1421static void dm_queue_flush(struct mapped_device *md, int type, void *context)
1422{
1423 struct dm_wq_req req;
1424
1425 dm_wq_queue(md, type, context, &req);
1426 flush_workqueue(md->wq);
1427}
1428
Linus Torvalds1da177e2005-04-16 15:20:36 -07001429/*
1430 * Swap in a new table (destroying old one).
1431 */
1432int dm_swap_table(struct mapped_device *md, struct dm_table *table)
1433{
Alasdair G Kergon93c534a2005-07-12 15:53:05 -07001434 int r = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435
Daniel Walkere61290a2008-02-08 02:10:08 +00001436 mutex_lock(&md->suspend_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437
1438 /* device must be suspended */
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001439 if (!dm_suspended(md))
Alasdair G Kergon93c534a2005-07-12 15:53:05 -07001440 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441
Jun'ichi Nomurabfa152f2007-01-26 00:57:07 -08001442 /* without bdev, the device size cannot be changed */
1443 if (!md->suspended_bdev)
1444 if (get_capacity(md->disk) != dm_table_get_size(table))
1445 goto out;
1446
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447 __unbind(md);
1448 r = __bind(md, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449
Alasdair G Kergon93c534a2005-07-12 15:53:05 -07001450out:
Daniel Walkere61290a2008-02-08 02:10:08 +00001451 mutex_unlock(&md->suspend_lock);
Alasdair G Kergon93c534a2005-07-12 15:53:05 -07001452 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453}
1454
1455/*
1456 * Functions to lock and unlock any filesystem running on the
1457 * device.
1458 */
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001459static int lock_fs(struct mapped_device *md)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460{
Alasdair G Kergone39e2e92006-01-06 00:20:05 -08001461 int r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462
1463 WARN_ON(md->frozen_sb);
Alasdair G Kergondfbe03f2005-05-05 16:16:04 -07001464
Alasdair G Kergone39e2e92006-01-06 00:20:05 -08001465 md->frozen_sb = freeze_bdev(md->suspended_bdev);
Alasdair G Kergondfbe03f2005-05-05 16:16:04 -07001466 if (IS_ERR(md->frozen_sb)) {
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001467 r = PTR_ERR(md->frozen_sb);
Alasdair G Kergone39e2e92006-01-06 00:20:05 -08001468 md->frozen_sb = NULL;
1469 return r;
Alasdair G Kergondfbe03f2005-05-05 16:16:04 -07001470 }
1471
Alasdair G Kergonaa8d7c22006-01-06 00:20:06 -08001472 set_bit(DMF_FROZEN, &md->flags);
1473
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474 /* don't bdput right now, we don't want the bdev
Alasdair G Kergone39e2e92006-01-06 00:20:05 -08001475 * to go away while it is locked.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001476 */
1477 return 0;
1478}
1479
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001480static void unlock_fs(struct mapped_device *md)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481{
Alasdair G Kergonaa8d7c22006-01-06 00:20:06 -08001482 if (!test_bit(DMF_FROZEN, &md->flags))
1483 return;
1484
Alasdair G Kergone39e2e92006-01-06 00:20:05 -08001485 thaw_bdev(md->suspended_bdev, md->frozen_sb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001486 md->frozen_sb = NULL;
Alasdair G Kergonaa8d7c22006-01-06 00:20:06 -08001487 clear_bit(DMF_FROZEN, &md->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488}
1489
1490/*
1491 * We need to be able to change a mapping table under a mounted
1492 * filesystem. For example we might want to move some data in
1493 * the background. Before the table can be swapped with
1494 * dm_bind_table, dm_suspend must be called to flush any in
1495 * flight bios and ensure that any further io gets deferred.
1496 */
Kiyoshi Uedaa3d77d32006-12-08 02:41:04 -08001497int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498{
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001499 struct dm_table *map = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 DECLARE_WAITQUEUE(wait, current);
Milan Broz46125c12008-02-08 02:10:30 +00001501 int r = 0;
Kiyoshi Uedaa3d77d32006-12-08 02:41:04 -08001502 int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -08001503 int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504
Daniel Walkere61290a2008-02-08 02:10:08 +00001505 mutex_lock(&md->suspend_lock);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001506
Milan Broz73d410c2008-02-08 02:10:25 +00001507 if (dm_suspended(md)) {
1508 r = -EINVAL;
Alasdair G Kergond2874832006-11-08 17:44:43 -08001509 goto out_unlock;
Milan Broz73d410c2008-02-08 02:10:25 +00001510 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511
1512 map = dm_get_table(md);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -08001514 /*
1515 * DMF_NOFLUSH_SUSPENDING must be set before presuspend.
1516 * This flag is cleared before dm_suspend returns.
1517 */
1518 if (noflush)
1519 set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
1520
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001521 /* This does not get reverted if there's an error later. */
1522 dm_table_presuspend_targets(map);
1523
Jun'ichi Nomurabfa152f2007-01-26 00:57:07 -08001524 /* bdget() can stall if the pending I/Os are not flushed */
1525 if (!noflush) {
1526 md->suspended_bdev = bdget_disk(md->disk, 0);
1527 if (!md->suspended_bdev) {
1528 DMWARN("bdget failed in dm_suspend");
1529 r = -ENOMEM;
1530 goto flush_and_out;
1531 }
Alasdair G Kergone39e2e92006-01-06 00:20:05 -08001532
Milan Broz6d6f10d2008-02-08 02:10:22 +00001533 /*
1534 * Flush I/O to the device. noflush supersedes do_lockfs,
1535 * because lock_fs() needs to flush I/Os.
1536 */
1537 if (do_lockfs) {
1538 r = lock_fs(md);
1539 if (r)
1540 goto out;
1541 }
Alasdair G Kergonaa8d7c22006-01-06 00:20:06 -08001542 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543
1544 /*
Alasdair G Kergon354e0072005-05-05 16:16:05 -07001545 * First we set the BLOCK_IO flag so no more ios will be mapped.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001546 */
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001547 down_write(&md->io_lock);
1548 set_bit(DMF_BLOCK_IO, &md->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550 add_wait_queue(&md->wait, &wait);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001551 up_write(&md->io_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001552
1553 /* unplug */
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001554 if (map)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001555 dm_table_unplug_all(map);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001556
1557 /*
Milan Broz46125c12008-02-08 02:10:30 +00001558 * Wait for the already-mapped ios to complete.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001559 */
Milan Broz46125c12008-02-08 02:10:30 +00001560 r = dm_wait_for_completion(md);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001561
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001562 down_write(&md->io_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563 remove_wait_queue(&md->wait, &wait);
1564
Milan Broz6d6f10d2008-02-08 02:10:22 +00001565 if (noflush)
1566 __merge_pushback_list(md);
Milan Broz94d63512008-02-08 02:10:27 +00001567 up_write(&md->io_lock);
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -08001568
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569 /* were we interrupted ? */
Milan Broz46125c12008-02-08 02:10:30 +00001570 if (r < 0) {
Milan Broz304f3f62008-02-08 02:11:17 +00001571 dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
Milan Broz73d410c2008-02-08 02:10:25 +00001572
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001573 unlock_fs(md);
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -08001574 goto out; /* pushback list is already flushed, so skip flush */
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001575 }
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001576
1577 dm_table_postsuspend_targets(map);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001578
1579 set_bit(DMF_SUSPENDED, &md->flags);
1580
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -08001581flush_and_out:
Milan Broz304f3f62008-02-08 02:11:17 +00001582 if (r && noflush)
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -08001583 /*
1584 * Because there may be already I/Os in the pushback list,
1585 * flush them before return.
1586 */
Milan Broz304f3f62008-02-08 02:11:17 +00001587 dm_queue_flush(md, DM_WQ_FLUSH_ALL, NULL);
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -08001588
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001589out:
Alasdair G Kergone39e2e92006-01-06 00:20:05 -08001590 if (r && md->suspended_bdev) {
1591 bdput(md->suspended_bdev);
1592 md->suspended_bdev = NULL;
1593 }
1594
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595 dm_table_put(map);
Alasdair G Kergond2874832006-11-08 17:44:43 -08001596
1597out_unlock:
Daniel Walkere61290a2008-02-08 02:10:08 +00001598 mutex_unlock(&md->suspend_lock);
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001599 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600}
1601
1602int dm_resume(struct mapped_device *md)
1603{
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001604 int r = -EINVAL;
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001605 struct dm_table *map = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606
Daniel Walkere61290a2008-02-08 02:10:08 +00001607 mutex_lock(&md->suspend_lock);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001608 if (!dm_suspended(md))
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001609 goto out;
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001610
1611 map = dm_get_table(md);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001612 if (!map || !dm_table_get_size(map))
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001613 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001614
Milan Broz8757b772006-10-03 01:15:36 -07001615 r = dm_table_resume_targets(map);
1616 if (r)
1617 goto out;
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001618
Milan Broz304f3f62008-02-08 02:11:17 +00001619 dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001620
1621 unlock_fs(md);
1622
Jun'ichi Nomurabfa152f2007-01-26 00:57:07 -08001623 if (md->suspended_bdev) {
1624 bdput(md->suspended_bdev);
1625 md->suspended_bdev = NULL;
1626 }
Alasdair G Kergone39e2e92006-01-06 00:20:05 -08001627
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001628 clear_bit(DMF_SUSPENDED, &md->flags);
1629
Linus Torvalds1da177e2005-04-16 15:20:36 -07001630 dm_table_unplug_all(map);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631
Alasdair G Kergon69267a32007-12-13 14:15:57 +00001632 dm_kobject_uevent(md);
Hannes Reinecke8560ed62006-10-03 01:15:35 -07001633
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001634 r = 0;
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001635
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001636out:
1637 dm_table_put(map);
Daniel Walkere61290a2008-02-08 02:10:08 +00001638 mutex_unlock(&md->suspend_lock);
Alasdair G Kergon2ca33102005-07-28 21:16:00 -07001639
Alasdair G Kergoncf222b32005-07-28 21:15:57 -07001640 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641}
1642
1643/*-----------------------------------------------------------------
1644 * Event notification.
1645 *---------------------------------------------------------------*/
Alasdair G Kergon69267a32007-12-13 14:15:57 +00001646void dm_kobject_uevent(struct mapped_device *md)
1647{
Tejun Heoed9e1982008-08-25 19:56:05 +09001648 kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE);
Alasdair G Kergon69267a32007-12-13 14:15:57 +00001649}
1650
Mike Anderson7a8c3d32007-10-19 22:48:01 +01001651uint32_t dm_next_uevent_seq(struct mapped_device *md)
1652{
1653 return atomic_add_return(1, &md->uevent_seq);
1654}
1655
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656uint32_t dm_get_event_nr(struct mapped_device *md)
1657{
1658 return atomic_read(&md->event_nr);
1659}
1660
1661int dm_wait_event(struct mapped_device *md, int event_nr)
1662{
1663 return wait_event_interruptible(md->eventq,
1664 (event_nr != atomic_read(&md->event_nr)));
1665}
1666
Mike Anderson7a8c3d32007-10-19 22:48:01 +01001667void dm_uevent_add(struct mapped_device *md, struct list_head *elist)
1668{
1669 unsigned long flags;
1670
1671 spin_lock_irqsave(&md->uevent_lock, flags);
1672 list_add(elist, &md->uevent_list);
1673 spin_unlock_irqrestore(&md->uevent_lock, flags);
1674}
1675
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676/*
1677 * The gendisk is only valid as long as you have a reference
1678 * count on 'md'.
1679 */
1680struct gendisk *dm_disk(struct mapped_device *md)
1681{
1682 return md->disk;
1683}
1684
1685int dm_suspended(struct mapped_device *md)
1686{
1687 return test_bit(DMF_SUSPENDED, &md->flags);
1688}
1689
Kiyoshi Ueda2e93ccc2006-12-08 02:41:09 -08001690int dm_noflush_suspending(struct dm_target *ti)
1691{
1692 struct mapped_device *md = dm_table_get_md(ti->table);
1693 int r = __noflush_suspending(md);
1694
1695 dm_put(md);
1696
1697 return r;
1698}
1699EXPORT_SYMBOL_GPL(dm_noflush_suspending);
1700
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701static struct block_device_operations dm_blk_dops = {
1702 .open = dm_blk_open,
1703 .release = dm_blk_close,
Milan Brozaa129a22006-10-03 01:15:15 -07001704 .ioctl = dm_blk_ioctl,
Darrick J. Wong3ac51e72006-03-27 01:17:54 -08001705 .getgeo = dm_blk_getgeo,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706 .owner = THIS_MODULE
1707};
1708
1709EXPORT_SYMBOL(dm_get_mapinfo);
1710
1711/*
1712 * module hooks
1713 */
1714module_init(dm_init);
1715module_exit(dm_exit);
1716
1717module_param(major, uint, 0);
1718MODULE_PARM_DESC(major, "The major number of the device mapper");
1719MODULE_DESCRIPTION(DM_NAME " driver");
1720MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
1721MODULE_LICENSE("GPL");