blob: 66db79208c1d85ce9f872e6167ff9b4e0d3d56dc [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright (C) 2003 Sistina Software
Heinz Mauelshagen891ce202007-05-09 02:33:00 -07003 * Copyright (C) 2006 Red Hat GmbH
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 *
5 * This file is released under the GPL.
6 */
7
8#include "dm-io.h"
9
10#include <linux/bio.h>
11#include <linux/mempool.h>
12#include <linux/module.h>
13#include <linux/sched.h>
14#include <linux/slab.h>
15
16static struct bio_set *_bios;
17
Heinz Mauelshagen891ce202007-05-09 02:33:00 -070018struct dm_io_client {
19 mempool_t *pool;
20 struct bio_set *bios;
21};
22
Linus Torvalds1da177e2005-04-16 15:20:36 -070023/* FIXME: can we shrink this ? */
24struct io {
25 unsigned long error;
26 atomic_t count;
27 struct task_struct *sleeper;
Heinz Mauelshagen891ce202007-05-09 02:33:00 -070028 struct dm_io_client *client;
Linus Torvalds1da177e2005-04-16 15:20:36 -070029 io_notify_fn callback;
30 void *context;
31};
32
33/*
34 * io contexts are only dynamically allocated for asynchronous
35 * io. Since async io is likely to be the majority of io we'll
Heinz Mauelshagen891ce202007-05-09 02:33:00 -070036 * have the same number of io contexts as bios! (FIXME: must reduce this).
Linus Torvalds1da177e2005-04-16 15:20:36 -070037 */
38static unsigned _num_ios;
39static mempool_t *_io_pool;
40
Heinz Mauelshagen891ce202007-05-09 02:33:00 -070041/*
42 * Temporary functions to allow old and new interfaces to co-exist.
43 */
44static struct bio_set *bios(struct dm_io_client *client)
45{
46 return client ? client->bios : _bios;
47}
48
49static mempool_t *io_pool(struct dm_io_client *client)
50{
51 return client ? client->pool : _io_pool;
52}
53
Linus Torvalds1da177e2005-04-16 15:20:36 -070054static unsigned int pages_to_ios(unsigned int pages)
55{
56 return 4 * pages; /* too many ? */
57}
58
59static int resize_pool(unsigned int new_ios)
60{
61 int r = 0;
62
63 if (_io_pool) {
64 if (new_ios == 0) {
65 /* free off the pool */
66 mempool_destroy(_io_pool);
67 _io_pool = NULL;
68 bioset_free(_bios);
69
70 } else {
71 /* resize the pool */
72 r = mempool_resize(_io_pool, new_ios, GFP_KERNEL);
73 }
74
75 } else {
76 /* create new pool */
Matthew Dobson0eaae62a2006-03-26 01:37:47 -080077 _io_pool = mempool_create_kmalloc_pool(new_ios,
78 sizeof(struct io));
Linus Torvalds1da177e2005-04-16 15:20:36 -070079 if (!_io_pool)
80 return -ENOMEM;
81
Jens Axboe59725112007-04-02 10:06:42 +020082 _bios = bioset_create(16, 16);
Linus Torvalds1da177e2005-04-16 15:20:36 -070083 if (!_bios) {
84 mempool_destroy(_io_pool);
85 _io_pool = NULL;
86 return -ENOMEM;
87 }
88 }
89
90 if (!r)
91 _num_ios = new_ios;
92
93 return r;
94}
95
96int dm_io_get(unsigned int num_pages)
97{
98 return resize_pool(_num_ios + pages_to_ios(num_pages));
99}
100
101void dm_io_put(unsigned int num_pages)
102{
103 resize_pool(_num_ios - pages_to_ios(num_pages));
104}
105
106/*-----------------------------------------------------------------
107 * We need to keep track of which region a bio is doing io for.
108 * In order to save a memory allocation we store this the last
109 * bvec which we know is unused (blech).
110 * XXX This is ugly and can OOPS with some configs... find another way.
111 *---------------------------------------------------------------*/
112static inline void bio_set_region(struct bio *bio, unsigned region)
113{
Heinz Mauelshagenf00b16a2006-12-08 02:41:01 -0800114 bio->bi_io_vec[bio->bi_max_vecs].bv_len = region;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115}
116
117static inline unsigned bio_get_region(struct bio *bio)
118{
Heinz Mauelshagenf00b16a2006-12-08 02:41:01 -0800119 return bio->bi_io_vec[bio->bi_max_vecs].bv_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120}
121
122/*-----------------------------------------------------------------
123 * We need an io object to keep track of the number of bios that
124 * have been dispatched for a particular io.
125 *---------------------------------------------------------------*/
126static void dec_count(struct io *io, unsigned int region, int error)
127{
128 if (error)
129 set_bit(region, &io->error);
130
131 if (atomic_dec_and_test(&io->count)) {
132 if (io->sleeper)
133 wake_up_process(io->sleeper);
134
135 else {
136 int r = io->error;
137 io_notify_fn fn = io->callback;
138 void *context = io->context;
139
Heinz Mauelshagen891ce202007-05-09 02:33:00 -0700140 mempool_free(io, io_pool(io->client));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 fn(r, context);
142 }
143 }
144}
145
146static int endio(struct bio *bio, unsigned int done, int error)
147{
Heinz Mauelshagenc897feb2007-05-09 02:33:00 -0700148 struct io *io;
149 unsigned region;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150
151 /* keep going until we've finished */
152 if (bio->bi_size)
153 return 1;
154
155 if (error && bio_data_dir(bio) == READ)
156 zero_fill_bio(bio);
157
Heinz Mauelshagenc897feb2007-05-09 02:33:00 -0700158 /*
159 * The bio destructor in bio_put() may use the io object.
160 */
161 io = bio->bi_private;
162 region = bio_get_region(bio);
163
Heinz Mauelshagenf00b16a2006-12-08 02:41:01 -0800164 bio->bi_max_vecs++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165 bio_put(bio);
166
Heinz Mauelshagenc897feb2007-05-09 02:33:00 -0700167 dec_count(io, region, error);
168
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 return 0;
170}
171
172/*-----------------------------------------------------------------
173 * These little objects provide an abstraction for getting a new
174 * destination page for io.
175 *---------------------------------------------------------------*/
176struct dpages {
177 void (*get_page)(struct dpages *dp,
178 struct page **p, unsigned long *len, unsigned *offset);
179 void (*next_page)(struct dpages *dp);
180
181 unsigned context_u;
182 void *context_ptr;
183};
184
185/*
186 * Functions for getting the pages from a list.
187 */
188static void list_get_page(struct dpages *dp,
189 struct page **p, unsigned long *len, unsigned *offset)
190{
191 unsigned o = dp->context_u;
192 struct page_list *pl = (struct page_list *) dp->context_ptr;
193
194 *p = pl->page;
195 *len = PAGE_SIZE - o;
196 *offset = o;
197}
198
199static void list_next_page(struct dpages *dp)
200{
201 struct page_list *pl = (struct page_list *) dp->context_ptr;
202 dp->context_ptr = pl->next;
203 dp->context_u = 0;
204}
205
206static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offset)
207{
208 dp->get_page = list_get_page;
209 dp->next_page = list_next_page;
210 dp->context_u = offset;
211 dp->context_ptr = pl;
212}
213
214/*
215 * Functions for getting the pages from a bvec.
216 */
217static void bvec_get_page(struct dpages *dp,
218 struct page **p, unsigned long *len, unsigned *offset)
219{
220 struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr;
221 *p = bvec->bv_page;
222 *len = bvec->bv_len;
223 *offset = bvec->bv_offset;
224}
225
226static void bvec_next_page(struct dpages *dp)
227{
228 struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr;
229 dp->context_ptr = bvec + 1;
230}
231
232static void bvec_dp_init(struct dpages *dp, struct bio_vec *bvec)
233{
234 dp->get_page = bvec_get_page;
235 dp->next_page = bvec_next_page;
236 dp->context_ptr = bvec;
237}
238
239static void vm_get_page(struct dpages *dp,
240 struct page **p, unsigned long *len, unsigned *offset)
241{
242 *p = vmalloc_to_page(dp->context_ptr);
243 *offset = dp->context_u;
244 *len = PAGE_SIZE - dp->context_u;
245}
246
247static void vm_next_page(struct dpages *dp)
248{
249 dp->context_ptr += PAGE_SIZE - dp->context_u;
250 dp->context_u = 0;
251}
252
253static void vm_dp_init(struct dpages *dp, void *data)
254{
255 dp->get_page = vm_get_page;
256 dp->next_page = vm_next_page;
257 dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1);
258 dp->context_ptr = data;
259}
260
Peter Osterlund36763472005-09-06 15:16:42 -0700261static void dm_bio_destructor(struct bio *bio)
262{
Heinz Mauelshagen891ce202007-05-09 02:33:00 -0700263 struct io *io = bio->bi_private;
264
265 bio_free(bio, bios(io->client));
Peter Osterlund36763472005-09-06 15:16:42 -0700266}
267
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268/*-----------------------------------------------------------------
269 * IO routines that accept a list of pages.
270 *---------------------------------------------------------------*/
271static void do_region(int rw, unsigned int region, struct io_region *where,
272 struct dpages *dp, struct io *io)
273{
274 struct bio *bio;
275 struct page *page;
276 unsigned long len;
277 unsigned offset;
278 unsigned num_bvecs;
279 sector_t remaining = where->count;
280
281 while (remaining) {
282 /*
Heinz Mauelshagenf00b16a2006-12-08 02:41:01 -0800283 * Allocate a suitably sized-bio: we add an extra
284 * bvec for bio_get/set_region() and decrement bi_max_vecs
285 * to hide it from bio_add_page().
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286 */
Heinz Mauelshagenf00b16a2006-12-08 02:41:01 -0800287 num_bvecs = (remaining / (PAGE_SIZE >> SECTOR_SHIFT)) + 2;
Heinz Mauelshagen891ce202007-05-09 02:33:00 -0700288 bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, bios(io->client));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 bio->bi_sector = where->sector + (where->count - remaining);
290 bio->bi_bdev = where->bdev;
291 bio->bi_end_io = endio;
292 bio->bi_private = io;
Peter Osterlund36763472005-09-06 15:16:42 -0700293 bio->bi_destructor = dm_bio_destructor;
Heinz Mauelshagenf00b16a2006-12-08 02:41:01 -0800294 bio->bi_max_vecs--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 bio_set_region(bio, region);
296
297 /*
298 * Try and add as many pages as possible.
299 */
300 while (remaining) {
301 dp->get_page(dp, &page, &len, &offset);
302 len = min(len, to_bytes(remaining));
303 if (!bio_add_page(bio, page, len, offset))
304 break;
305
306 offset = 0;
307 remaining -= to_sector(len);
308 dp->next_page(dp);
309 }
310
311 atomic_inc(&io->count);
312 submit_bio(rw, bio);
313 }
314}
315
316static void dispatch_io(int rw, unsigned int num_regions,
317 struct io_region *where, struct dpages *dp,
318 struct io *io, int sync)
319{
320 int i;
321 struct dpages old_pages = *dp;
322
323 if (sync)
324 rw |= (1 << BIO_RW_SYNC);
325
326 /*
327 * For multiple regions we need to be careful to rewind
328 * the dp object for each call to do_region.
329 */
330 for (i = 0; i < num_regions; i++) {
331 *dp = old_pages;
332 if (where[i].count)
333 do_region(rw, i, where + i, dp, io);
334 }
335
336 /*
Heinz Mauelshagenf00b16a2006-12-08 02:41:01 -0800337 * Drop the extra reference that we were holding to avoid
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 * the io being completed too early.
339 */
340 dec_count(io, 0, 0);
341}
342
Heinz Mauelshagen891ce202007-05-09 02:33:00 -0700343static int sync_io(struct dm_io_client *client, unsigned int num_regions,
344 struct io_region *where, int rw, struct dpages *dp,
345 unsigned long *error_bits)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346{
347 struct io io;
348
349 if (num_regions > 1 && rw != WRITE) {
350 WARN_ON(1);
351 return -EIO;
352 }
353
354 io.error = 0;
355 atomic_set(&io.count, 1); /* see dispatch_io() */
356 io.sleeper = current;
Heinz Mauelshagen891ce202007-05-09 02:33:00 -0700357 io.client = client;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358
359 dispatch_io(rw, num_regions, where, dp, &io, 1);
360
361 while (1) {
362 set_current_state(TASK_UNINTERRUPTIBLE);
363
364 if (!atomic_read(&io.count) || signal_pending(current))
365 break;
366
367 io_schedule();
368 }
369 set_current_state(TASK_RUNNING);
370
371 if (atomic_read(&io.count))
372 return -EINTR;
373
Heinz Mauelshagen891ce202007-05-09 02:33:00 -0700374 if (error_bits)
375 *error_bits = io.error;
376
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 return io.error ? -EIO : 0;
378}
379
Heinz Mauelshagen891ce202007-05-09 02:33:00 -0700380static int async_io(struct dm_io_client *client, unsigned int num_regions,
381 struct io_region *where, int rw, struct dpages *dp,
382 io_notify_fn fn, void *context)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383{
384 struct io *io;
385
386 if (num_regions > 1 && rw != WRITE) {
387 WARN_ON(1);
388 fn(1, context);
389 return -EIO;
390 }
391
Heinz Mauelshagen891ce202007-05-09 02:33:00 -0700392 io = mempool_alloc(io_pool(client), GFP_NOIO);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 io->error = 0;
394 atomic_set(&io->count, 1); /* see dispatch_io() */
395 io->sleeper = NULL;
Heinz Mauelshagen891ce202007-05-09 02:33:00 -0700396 io->client = client;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 io->callback = fn;
398 io->context = context;
399
400 dispatch_io(rw, num_regions, where, dp, io, 0);
401 return 0;
402}
403
404int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw,
405 struct page_list *pl, unsigned int offset,
406 unsigned long *error_bits)
407{
408 struct dpages dp;
409 list_dp_init(&dp, pl, offset);
Heinz Mauelshagen891ce202007-05-09 02:33:00 -0700410 return sync_io(NULL, num_regions, where, rw, &dp, error_bits);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411}
412
413int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where, int rw,
414 struct bio_vec *bvec, unsigned long *error_bits)
415{
416 struct dpages dp;
417 bvec_dp_init(&dp, bvec);
Heinz Mauelshagen891ce202007-05-09 02:33:00 -0700418 return sync_io(NULL, num_regions, where, rw, &dp, error_bits);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419}
420
421int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw,
422 void *data, unsigned long *error_bits)
423{
424 struct dpages dp;
425 vm_dp_init(&dp, data);
Heinz Mauelshagen891ce202007-05-09 02:33:00 -0700426 return sync_io(NULL, num_regions, where, rw, &dp, error_bits);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427}
428
429int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
430 struct page_list *pl, unsigned int offset,
431 io_notify_fn fn, void *context)
432{
433 struct dpages dp;
434 list_dp_init(&dp, pl, offset);
Heinz Mauelshagen891ce202007-05-09 02:33:00 -0700435 return async_io(NULL, num_regions, where, rw, &dp, fn, context);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436}
437
438int dm_io_async_bvec(unsigned int num_regions, struct io_region *where, int rw,
439 struct bio_vec *bvec, io_notify_fn fn, void *context)
440{
441 struct dpages dp;
442 bvec_dp_init(&dp, bvec);
Heinz Mauelshagen891ce202007-05-09 02:33:00 -0700443 return async_io(NULL, num_regions, where, rw, &dp, fn, context);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444}
445
446int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw,
447 void *data, io_notify_fn fn, void *context)
448{
449 struct dpages dp;
450 vm_dp_init(&dp, data);
Heinz Mauelshagen891ce202007-05-09 02:33:00 -0700451 return async_io(NULL, num_regions, where, rw, &dp, fn, context);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452}
453
454EXPORT_SYMBOL(dm_io_get);
455EXPORT_SYMBOL(dm_io_put);
456EXPORT_SYMBOL(dm_io_sync);
457EXPORT_SYMBOL(dm_io_async);
458EXPORT_SYMBOL(dm_io_sync_bvec);
459EXPORT_SYMBOL(dm_io_async_bvec);
460EXPORT_SYMBOL(dm_io_sync_vm);
461EXPORT_SYMBOL(dm_io_async_vm);