blob: c63044e9c4c0276165ada0dfacdc2c54196dd040 [file] [log] [blame]
Jens Axboe86db1e22008-01-29 14:53:40 +01001/*
2 * Functions related to barrier IO handling
3 */
4#include <linux/kernel.h>
5#include <linux/module.h>
6#include <linux/bio.h>
7#include <linux/blkdev.h>
8
9#include "blk.h"
10
11/**
12 * blk_queue_ordered - does this queue support ordered writes
13 * @q: the request queue
14 * @ordered: one of QUEUE_ORDERED_*
15 * @prepare_flush_fn: rq setup helper for cache flush ordered writes
16 *
17 * Description:
18 * For journalled file systems, doing ordered writes on a commit
19 * block instead of explicitly doing wait_on_buffer (which is bad
20 * for performance) can be a big win. Block drivers supporting this
21 * feature should call this function and indicate so.
22 *
23 **/
24int blk_queue_ordered(struct request_queue *q, unsigned ordered,
25 prepare_flush_fn *prepare_flush_fn)
26{
Tejun Heo313e4292008-11-28 13:32:02 +090027 if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH |
28 QUEUE_ORDERED_DO_POSTFLUSH))) {
Harvey Harrison24c03d42008-05-01 04:35:17 -070029 printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__);
Jens Axboe86db1e22008-01-29 14:53:40 +010030 return -EINVAL;
31 }
32
33 if (ordered != QUEUE_ORDERED_NONE &&
34 ordered != QUEUE_ORDERED_DRAIN &&
35 ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
36 ordered != QUEUE_ORDERED_DRAIN_FUA &&
37 ordered != QUEUE_ORDERED_TAG &&
38 ordered != QUEUE_ORDERED_TAG_FLUSH &&
39 ordered != QUEUE_ORDERED_TAG_FUA) {
40 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
41 return -EINVAL;
42 }
43
44 q->ordered = ordered;
45 q->next_ordered = ordered;
46 q->prepare_flush_fn = prepare_flush_fn;
47
48 return 0;
49}
Jens Axboe86db1e22008-01-29 14:53:40 +010050EXPORT_SYMBOL(blk_queue_ordered);
51
52/*
53 * Cache flushing for ordered writes handling
54 */
Adrian Bunk6f6a0362008-04-29 09:49:06 +020055unsigned blk_ordered_cur_seq(struct request_queue *q)
Jens Axboe86db1e22008-01-29 14:53:40 +010056{
57 if (!q->ordseq)
58 return 0;
59 return 1 << ffz(q->ordseq);
60}
61
62unsigned blk_ordered_req_seq(struct request *rq)
63{
64 struct request_queue *q = rq->q;
65
66 BUG_ON(q->ordseq == 0);
67
68 if (rq == &q->pre_flush_rq)
69 return QUEUE_ORDSEQ_PREFLUSH;
70 if (rq == &q->bar_rq)
71 return QUEUE_ORDSEQ_BAR;
72 if (rq == &q->post_flush_rq)
73 return QUEUE_ORDSEQ_POSTFLUSH;
74
75 /*
76 * !fs requests don't need to follow barrier ordering. Always
77 * put them at the front. This fixes the following deadlock.
78 *
79 * http://thread.gmane.org/gmane.linux.kernel/537473
80 */
81 if (!blk_fs_request(rq))
82 return QUEUE_ORDSEQ_DRAIN;
83
84 if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
85 (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
86 return QUEUE_ORDSEQ_DRAIN;
87 else
88 return QUEUE_ORDSEQ_DONE;
89}
90
Tejun Heo8f11b3e2008-11-28 13:32:05 +090091bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
Jens Axboe86db1e22008-01-29 14:53:40 +010092{
93 struct request *rq;
94
95 if (error && !q->orderr)
96 q->orderr = error;
97
98 BUG_ON(q->ordseq & seq);
99 q->ordseq |= seq;
100
101 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
Tejun Heo8f11b3e2008-11-28 13:32:05 +0900102 return false;
Jens Axboe86db1e22008-01-29 14:53:40 +0100103
104 /*
105 * Okay, sequence complete.
106 */
107 q->ordseq = 0;
108 rq = q->orig_bar_rq;
109
110 if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq)))
111 BUG();
Tejun Heo8f11b3e2008-11-28 13:32:05 +0900112
113 return true;
Jens Axboe86db1e22008-01-29 14:53:40 +0100114}
115
116static void pre_flush_end_io(struct request *rq, int error)
117{
118 elv_completed_request(rq->q, rq);
119 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
120}
121
122static void bar_end_io(struct request *rq, int error)
123{
124 elv_completed_request(rq->q, rq);
125 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
126}
127
128static void post_flush_end_io(struct request *rq, int error)
129{
130 elv_completed_request(rq->q, rq);
131 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
132}
133
134static void queue_flush(struct request_queue *q, unsigned which)
135{
136 struct request *rq;
137 rq_end_io_fn *end_io;
138
Tejun Heo313e4292008-11-28 13:32:02 +0900139 if (which == QUEUE_ORDERED_DO_PREFLUSH) {
Jens Axboe86db1e22008-01-29 14:53:40 +0100140 rq = &q->pre_flush_rq;
141 end_io = pre_flush_end_io;
142 } else {
143 rq = &q->post_flush_rq;
144 end_io = post_flush_end_io;
145 }
146
FUJITA Tomonori2a4aa302008-04-29 09:54:36 +0200147 blk_rq_init(q, rq);
FUJITA Tomonori1afb20f2008-04-25 12:26:28 +0200148 rq->cmd_flags = REQ_HARDBARRIER;
Jens Axboe86db1e22008-01-29 14:53:40 +0100149 rq->rq_disk = q->bar_rq.rq_disk;
150 rq->end_io = end_io;
151 q->prepare_flush_fn(q, rq);
152
153 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
154}
155
Tejun Heo8f11b3e2008-11-28 13:32:05 +0900156static inline bool start_ordered(struct request_queue *q, struct request **rqp)
Jens Axboe86db1e22008-01-29 14:53:40 +0100157{
Tejun Heo8f11b3e2008-11-28 13:32:05 +0900158 struct request *rq = *rqp;
159 unsigned skip = 0;
160
Jens Axboe86db1e22008-01-29 14:53:40 +0100161 q->orderr = 0;
162 q->ordered = q->next_ordered;
163 q->ordseq |= QUEUE_ORDSEQ_STARTED;
164
Tejun Heo58eea922008-11-28 13:32:06 +0900165 /*
166 * For an empty barrier, there's no actual BAR request, which
167 * in turn makes POSTFLUSH unnecessary. Mask them off.
168 */
169 if (!rq->hard_nr_sectors)
170 q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
171 QUEUE_ORDERED_DO_POSTFLUSH);
172
Tejun Heof6716202008-11-28 13:32:04 +0900173 /* stash away the original request */
Tejun Heo53a08802008-12-03 12:41:26 +0100174 elv_dequeue_request(q, rq);
Jens Axboe86db1e22008-01-29 14:53:40 +0100175 q->orig_bar_rq = rq;
Tejun Heof6716202008-11-28 13:32:04 +0900176 rq = NULL;
Jens Axboe86db1e22008-01-29 14:53:40 +0100177
178 /*
179 * Queue ordered sequence. As we stack them at the head, we
180 * need to queue in reverse order. Note that we rely on that
181 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
Tejun Heo58eea922008-11-28 13:32:06 +0900182 * request gets inbetween ordered sequence.
Jens Axboe86db1e22008-01-29 14:53:40 +0100183 */
Tejun Heo58eea922008-11-28 13:32:06 +0900184 if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
Tejun Heo313e4292008-11-28 13:32:02 +0900185 queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
Tejun Heof6716202008-11-28 13:32:04 +0900186 rq = &q->post_flush_rq;
187 } else
Tejun Heo8f11b3e2008-11-28 13:32:05 +0900188 skip |= QUEUE_ORDSEQ_POSTFLUSH;
Jens Axboe86db1e22008-01-29 14:53:40 +0100189
Tejun Heof6716202008-11-28 13:32:04 +0900190 if (q->ordered & QUEUE_ORDERED_DO_BAR) {
191 rq = &q->bar_rq;
192
193 /* initialize proxy request and queue it */
194 blk_rq_init(q, rq);
195 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
196 rq->cmd_flags |= REQ_RW;
197 if (q->ordered & QUEUE_ORDERED_DO_FUA)
198 rq->cmd_flags |= REQ_FUA;
199 init_request_from_bio(rq, q->orig_bar_rq->bio);
200 rq->end_io = bar_end_io;
201
202 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
203 } else
Tejun Heo8f11b3e2008-11-28 13:32:05 +0900204 skip |= QUEUE_ORDSEQ_BAR;
Jens Axboe86db1e22008-01-29 14:53:40 +0100205
Tejun Heo313e4292008-11-28 13:32:02 +0900206 if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
207 queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
Jens Axboe86db1e22008-01-29 14:53:40 +0100208 rq = &q->pre_flush_rq;
209 } else
Tejun Heo8f11b3e2008-11-28 13:32:05 +0900210 skip |= QUEUE_ORDSEQ_PREFLUSH;
Jens Axboe86db1e22008-01-29 14:53:40 +0100211
Tejun Heof6716202008-11-28 13:32:04 +0900212 if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight)
Jens Axboe86db1e22008-01-29 14:53:40 +0100213 rq = NULL;
Tejun Heof6716202008-11-28 13:32:04 +0900214 else
Tejun Heo8f11b3e2008-11-28 13:32:05 +0900215 skip |= QUEUE_ORDSEQ_DRAIN;
Jens Axboe86db1e22008-01-29 14:53:40 +0100216
Tejun Heo8f11b3e2008-11-28 13:32:05 +0900217 *rqp = rq;
218
219 /*
220 * Complete skipped sequences. If whole sequence is complete,
221 * return false to tell elevator that this request is gone.
222 */
223 return !blk_ordered_complete_seq(q, skip, 0);
Jens Axboe86db1e22008-01-29 14:53:40 +0100224}
225
Tejun Heo8f11b3e2008-11-28 13:32:05 +0900226bool blk_do_ordered(struct request_queue *q, struct request **rqp)
Jens Axboe86db1e22008-01-29 14:53:40 +0100227{
228 struct request *rq = *rqp;
229 const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
230
231 if (!q->ordseq) {
232 if (!is_barrier)
Tejun Heo8f11b3e2008-11-28 13:32:05 +0900233 return true;
Jens Axboe86db1e22008-01-29 14:53:40 +0100234
Tejun Heo8f11b3e2008-11-28 13:32:05 +0900235 if (q->next_ordered != QUEUE_ORDERED_NONE)
236 return start_ordered(q, rqp);
237 else {
Jens Axboe86db1e22008-01-29 14:53:40 +0100238 /*
Tejun Heoa7384672008-11-28 13:32:03 +0900239 * Queue ordering not supported. Terminate
240 * with prejudice.
Jens Axboe86db1e22008-01-29 14:53:40 +0100241 */
Tejun Heo53a08802008-12-03 12:41:26 +0100242 elv_dequeue_request(q, rq);
Jens Axboe86db1e22008-01-29 14:53:40 +0100243 if (__blk_end_request(rq, -EOPNOTSUPP,
244 blk_rq_bytes(rq)))
245 BUG();
246 *rqp = NULL;
Tejun Heo8f11b3e2008-11-28 13:32:05 +0900247 return false;
Jens Axboe86db1e22008-01-29 14:53:40 +0100248 }
249 }
250
251 /*
252 * Ordered sequence in progress
253 */
254
255 /* Special requests are not subject to ordering rules. */
256 if (!blk_fs_request(rq) &&
257 rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
Tejun Heo8f11b3e2008-11-28 13:32:05 +0900258 return true;
Jens Axboe86db1e22008-01-29 14:53:40 +0100259
Tejun Heo313e4292008-11-28 13:32:02 +0900260 if (q->ordered & QUEUE_ORDERED_BY_TAG) {
Jens Axboe86db1e22008-01-29 14:53:40 +0100261 /* Ordered by tag. Blocking the next barrier is enough. */
262 if (is_barrier && rq != &q->bar_rq)
263 *rqp = NULL;
264 } else {
265 /* Ordered by draining. Wait for turn. */
266 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
267 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
268 *rqp = NULL;
269 }
270
Tejun Heo8f11b3e2008-11-28 13:32:05 +0900271 return true;
Jens Axboe86db1e22008-01-29 14:53:40 +0100272}
273
274static void bio_end_empty_barrier(struct bio *bio, int err)
275{
Jens Axboecc66b452008-03-04 11:47:46 +0100276 if (err) {
277 if (err == -EOPNOTSUPP)
278 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
Jens Axboe86db1e22008-01-29 14:53:40 +0100279 clear_bit(BIO_UPTODATE, &bio->bi_flags);
Jens Axboecc66b452008-03-04 11:47:46 +0100280 }
Jens Axboe86db1e22008-01-29 14:53:40 +0100281
282 complete(bio->bi_private);
283}
284
285/**
286 * blkdev_issue_flush - queue a flush
287 * @bdev: blockdev to issue flush for
288 * @error_sector: error sector
289 *
290 * Description:
291 * Issue a flush for the block device in question. Caller can supply
292 * room for storing the error offset in case of a flush error, if they
293 * wish to. Caller must run wait_for_completion() on its own.
294 */
295int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
296{
297 DECLARE_COMPLETION_ONSTACK(wait);
298 struct request_queue *q;
299 struct bio *bio;
300 int ret;
301
302 if (bdev->bd_disk == NULL)
303 return -ENXIO;
304
305 q = bdev_get_queue(bdev);
306 if (!q)
307 return -ENXIO;
308
309 bio = bio_alloc(GFP_KERNEL, 0);
310 if (!bio)
311 return -ENOMEM;
312
313 bio->bi_end_io = bio_end_empty_barrier;
314 bio->bi_private = &wait;
315 bio->bi_bdev = bdev;
OGAWA Hirofumi2ebca852008-08-11 17:07:08 +0100316 submit_bio(WRITE_BARRIER, bio);
Jens Axboe86db1e22008-01-29 14:53:40 +0100317
318 wait_for_completion(&wait);
319
320 /*
321 * The driver must store the error location in ->bi_sector, if
322 * it supports it. For non-stacked drivers, this should be copied
323 * from rq->sector.
324 */
325 if (error_sector)
326 *error_sector = bio->bi_sector;
327
328 ret = 0;
Jens Axboecc66b452008-03-04 11:47:46 +0100329 if (bio_flagged(bio, BIO_EOPNOTSUPP))
330 ret = -EOPNOTSUPP;
331 else if (!bio_flagged(bio, BIO_UPTODATE))
Jens Axboe86db1e22008-01-29 14:53:40 +0100332 ret = -EIO;
333
334 bio_put(bio);
335 return ret;
336}
Jens Axboe86db1e22008-01-29 14:53:40 +0100337EXPORT_SYMBOL(blkdev_issue_flush);
David Woodhousefb2dce82008-08-05 18:01:53 +0100338
339static void blkdev_discard_end_io(struct bio *bio, int err)
340{
341 if (err) {
342 if (err == -EOPNOTSUPP)
343 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
344 clear_bit(BIO_UPTODATE, &bio->bi_flags);
345 }
346
347 bio_put(bio);
348}
349
350/**
351 * blkdev_issue_discard - queue a discard
352 * @bdev: blockdev to issue discard for
353 * @sector: start sector
354 * @nr_sects: number of sectors to discard
Hugh Dickins3e6053d2008-09-11 10:57:55 +0200355 * @gfp_mask: memory allocation flags (for bio_alloc)
David Woodhousefb2dce82008-08-05 18:01:53 +0100356 *
357 * Description:
358 * Issue a discard request for the sectors in question. Does not wait.
359 */
Hugh Dickins3e6053d2008-09-11 10:57:55 +0200360int blkdev_issue_discard(struct block_device *bdev,
361 sector_t sector, sector_t nr_sects, gfp_t gfp_mask)
David Woodhousefb2dce82008-08-05 18:01:53 +0100362{
363 struct request_queue *q;
364 struct bio *bio;
365 int ret = 0;
366
367 if (bdev->bd_disk == NULL)
368 return -ENXIO;
369
370 q = bdev_get_queue(bdev);
371 if (!q)
372 return -ENXIO;
373
374 if (!q->prepare_discard_fn)
375 return -EOPNOTSUPP;
376
377 while (nr_sects && !ret) {
Hugh Dickins3e6053d2008-09-11 10:57:55 +0200378 bio = bio_alloc(gfp_mask, 0);
David Woodhousefb2dce82008-08-05 18:01:53 +0100379 if (!bio)
380 return -ENOMEM;
381
382 bio->bi_end_io = blkdev_discard_end_io;
383 bio->bi_bdev = bdev;
384
385 bio->bi_sector = sector;
386
387 if (nr_sects > q->max_hw_sectors) {
388 bio->bi_size = q->max_hw_sectors << 9;
389 nr_sects -= q->max_hw_sectors;
390 sector += q->max_hw_sectors;
391 } else {
392 bio->bi_size = nr_sects << 9;
393 nr_sects = 0;
394 }
395 bio_get(bio);
David Woodhousee17fc0a2008-08-09 16:42:20 +0100396 submit_bio(DISCARD_BARRIER, bio);
David Woodhousefb2dce82008-08-05 18:01:53 +0100397
398 /* Check if it failed immediately */
399 if (bio_flagged(bio, BIO_EOPNOTSUPP))
400 ret = -EOPNOTSUPP;
401 else if (!bio_flagged(bio, BIO_UPTODATE))
402 ret = -EIO;
403 bio_put(bio);
404 }
405 return ret;
406}
407EXPORT_SYMBOL(blkdev_issue_discard);