Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 1 | /* |
Tejun Heo | 4fed947 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 2 | * Functions to sequence FLUSH and FUA writes. |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 3 | */ |
| 4 | #include <linux/kernel.h> |
| 5 | #include <linux/module.h> |
| 6 | #include <linux/bio.h> |
| 7 | #include <linux/blkdev.h> |
Tejun Heo | 5a0e3ad | 2010-03-24 17:04:11 +0900 | [diff] [blame] | 8 | #include <linux/gfp.h> |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 9 | |
| 10 | #include "blk.h" |
| 11 | |
Tejun Heo | 4fed947 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 12 | /* FLUSH/FUA sequences */ |
| 13 | enum { |
| 14 | QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ |
| 15 | QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ |
| 16 | QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ |
| 17 | QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ |
| 18 | QUEUE_FSEQ_DONE = (1 << 4), |
| 19 | }; |
| 20 | |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 21 | static struct request *queue_next_fseq(struct request_queue *q); |
Tejun Heo | 28e7d18 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 22 | |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 23 | unsigned blk_flush_cur_seq(struct request_queue *q) |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 24 | { |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 25 | if (!q->flush_seq) |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 26 | return 0; |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 27 | return 1 << ffz(q->flush_seq); |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 28 | } |
| 29 | |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 30 | static struct request *blk_flush_complete_seq(struct request_queue *q, |
| 31 | unsigned seq, int error) |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 32 | { |
Tejun Heo | 28e7d18 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 33 | struct request *next_rq = NULL; |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 34 | |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 35 | if (error && !q->flush_err) |
| 36 | q->flush_err = error; |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 37 | |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 38 | BUG_ON(q->flush_seq & seq); |
| 39 | q->flush_seq |= seq; |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 40 | |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 41 | if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) { |
| 42 | /* not complete yet, queue the next flush sequence */ |
| 43 | next_rq = queue_next_fseq(q); |
Tejun Heo | 28e7d18 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 44 | } else { |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 45 | /* complete this flush request */ |
| 46 | __blk_end_request_all(q->orig_flush_rq, q->flush_err); |
| 47 | q->orig_flush_rq = NULL; |
| 48 | q->flush_seq = 0; |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 49 | |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 50 | /* dispatch the next flush if there's one */ |
| 51 | if (!list_empty(&q->pending_flushes)) { |
| 52 | next_rq = list_entry_rq(q->pending_flushes.next); |
Tejun Heo | 28e7d18 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 53 | list_move(&next_rq->queuelist, &q->queue_head); |
| 54 | } |
| 55 | } |
| 56 | return next_rq; |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 57 | } |
| 58 | |
Tejun Heo | 47f70d5 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 59 | static void blk_flush_complete_seq_end_io(struct request_queue *q, |
| 60 | unsigned seq, int error) |
| 61 | { |
| 62 | bool was_empty = elv_queue_empty(q); |
| 63 | struct request *next_rq; |
| 64 | |
| 65 | next_rq = blk_flush_complete_seq(q, seq, error); |
| 66 | |
| 67 | /* |
| 68 | * Moving a request silently to empty queue_head may stall the |
| 69 | * queue. Kick the queue in those cases. |
| 70 | */ |
| 71 | if (was_empty && next_rq) |
| 72 | __blk_run_queue(q); |
| 73 | } |
| 74 | |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 75 | static void pre_flush_end_io(struct request *rq, int error) |
| 76 | { |
| 77 | elv_completed_request(rq->q, rq); |
Tejun Heo | 47f70d5 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 78 | blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_PREFLUSH, error); |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 79 | } |
| 80 | |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 81 | static void flush_data_end_io(struct request *rq, int error) |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 82 | { |
| 83 | elv_completed_request(rq->q, rq); |
Tejun Heo | 47f70d5 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 84 | blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_DATA, error); |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 85 | } |
| 86 | |
| 87 | static void post_flush_end_io(struct request *rq, int error) |
| 88 | { |
| 89 | elv_completed_request(rq->q, rq); |
Tejun Heo | 47f70d5 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 90 | blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_POSTFLUSH, error); |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 91 | } |
| 92 | |
Christoph Hellwig | cde4c40 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 93 | static void init_flush_request(struct request *rq, struct gendisk *disk) |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 94 | { |
FUJITA Tomonori | 28e18d0 | 2010-07-09 09:38:24 +0900 | [diff] [blame] | 95 | rq->cmd_type = REQ_TYPE_FS; |
Tejun Heo | 337238b | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 96 | rq->cmd_flags = WRITE_FLUSH; |
Christoph Hellwig | cde4c40 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 97 | rq->rq_disk = disk; |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 98 | } |
| 99 | |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 100 | static struct request *queue_next_fseq(struct request_queue *q) |
Tejun Heo | 28e7d18 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 101 | { |
Tejun Heo | 4fed947 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 102 | struct request *orig_rq = q->orig_flush_rq; |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 103 | struct request *rq = &q->flush_rq; |
Tejun Heo | 28e7d18 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 104 | |
Christoph Hellwig | cde4c40 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 105 | blk_rq_init(q, rq); |
| 106 | |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 107 | switch (blk_flush_cur_seq(q)) { |
| 108 | case QUEUE_FSEQ_PREFLUSH: |
Christoph Hellwig | cde4c40 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 109 | init_flush_request(rq, orig_rq->rq_disk); |
| 110 | rq->end_io = pre_flush_end_io; |
Tejun Heo | 28e7d18 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 111 | break; |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 112 | case QUEUE_FSEQ_DATA: |
Tejun Heo | 4fed947 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 113 | init_request_from_bio(rq, orig_rq->bio); |
Tejun Heo | 09d60c7 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 114 | /* |
| 115 | * orig_rq->rq_disk may be different from |
| 116 | * bio->bi_bdev->bd_disk if orig_rq got here through |
| 117 | * remapping drivers. Make sure rq->rq_disk points |
| 118 | * to the same one as orig_rq. |
| 119 | */ |
| 120 | rq->rq_disk = orig_rq->rq_disk; |
Tejun Heo | 4fed947 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 121 | rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA); |
| 122 | rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA); |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 123 | rq->end_io = flush_data_end_io; |
Tejun Heo | 28e7d18 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 124 | break; |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 125 | case QUEUE_FSEQ_POSTFLUSH: |
Christoph Hellwig | cde4c40 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 126 | init_flush_request(rq, orig_rq->rq_disk); |
| 127 | rq->end_io = post_flush_end_io; |
Tejun Heo | 28e7d18 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 128 | break; |
Tejun Heo | 28e7d18 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 129 | default: |
| 130 | BUG(); |
| 131 | } |
Christoph Hellwig | cde4c40 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 132 | |
| 133 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); |
Tejun Heo | 28e7d18 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 134 | return rq; |
| 135 | } |
| 136 | |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 137 | struct request *blk_do_flush(struct request_queue *q, struct request *rq) |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 138 | { |
Tejun Heo | 4fed947 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 139 | unsigned int fflags = q->flush_flags; /* may change, cache it */ |
| 140 | bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; |
| 141 | bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH); |
| 142 | bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA); |
Tejun Heo | 8f11b3e | 2008-11-28 13:32:05 +0900 | [diff] [blame] | 143 | unsigned skip = 0; |
| 144 | |
Tejun Heo | 4fed947 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 145 | /* |
| 146 | * Special case. If there's data but flush is not necessary, |
| 147 | * the request can be issued directly. |
| 148 | * |
| 149 | * Flush w/o data should be able to be issued directly too but |
| 150 | * currently some drivers assume that rq->bio contains |
| 151 | * non-zero data if it isn't NULL and empty FLUSH requests |
| 152 | * getting here usually have bio's without data. |
| 153 | */ |
| 154 | if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { |
| 155 | rq->cmd_flags &= ~REQ_FLUSH; |
| 156 | if (!has_fua) |
| 157 | rq->cmd_flags &= ~REQ_FUA; |
Tejun Heo | 28e7d18 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 158 | return rq; |
Tejun Heo | 28e7d18 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 159 | } |
| 160 | |
Tejun Heo | 4fed947 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 161 | /* |
| 162 | * Sequenced flushes can't be processed in parallel. If |
| 163 | * another one is already in progress, queue for later |
| 164 | * processing. |
| 165 | */ |
| 166 | if (q->flush_seq) { |
| 167 | list_move_tail(&rq->queuelist, &q->pending_flushes); |
Tejun Heo | 28e7d18 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 168 | return NULL; |
| 169 | } |
| 170 | |
| 171 | /* |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 172 | * Start a new flush sequence |
Tejun Heo | 28e7d18 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 173 | */ |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 174 | q->flush_err = 0; |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 175 | q->flush_seq |= QUEUE_FSEQ_STARTED; |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 176 | |
Tejun Heo | 4fed947 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 177 | /* adjust FLUSH/FUA of the original request and stash it away */ |
| 178 | rq->cmd_flags &= ~REQ_FLUSH; |
| 179 | if (!has_fua) |
| 180 | rq->cmd_flags &= ~REQ_FUA; |
Tejun Heo | 9934c8c | 2009-05-08 11:54:16 +0900 | [diff] [blame] | 181 | blk_dequeue_request(rq); |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 182 | q->orig_flush_rq = rq; |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 183 | |
Tejun Heo | 4fed947 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 184 | /* skip unneded sequences and return the first one */ |
| 185 | if (!do_preflush) |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 186 | skip |= QUEUE_FSEQ_PREFLUSH; |
Tejun Heo | 4fed947 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 187 | if (!blk_rq_sectors(rq)) |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 188 | skip |= QUEUE_FSEQ_DATA; |
Tejun Heo | 4fed947 | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 189 | if (!do_postflush) |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 190 | skip |= QUEUE_FSEQ_POSTFLUSH; |
Tejun Heo | dd4c133 | 2010-09-03 11:56:16 +0200 | [diff] [blame] | 191 | return blk_flush_complete_seq(q, skip, 0); |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 192 | } |
| 193 | |
Tejun Heo | d391a2d | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 194 | static void bio_end_flush(struct bio *bio, int err) |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 195 | { |
Tejun Heo | d391a2d | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 196 | if (err) |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 197 | clear_bit(BIO_UPTODATE, &bio->bi_flags); |
Dmitry Monakhov | f17e232 | 2010-04-28 17:55:07 +0400 | [diff] [blame] | 198 | if (bio->bi_private) |
| 199 | complete(bio->bi_private); |
| 200 | bio_put(bio); |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 201 | } |
| 202 | |
| 203 | /** |
| 204 | * blkdev_issue_flush - queue a flush |
| 205 | * @bdev: blockdev to issue flush for |
Dmitry Monakhov | fbd9b09 | 2010-04-28 17:55:06 +0400 | [diff] [blame] | 206 | * @gfp_mask: memory allocation flags (for bio_alloc) |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 207 | * @error_sector: error sector |
| 208 | * |
| 209 | * Description: |
| 210 | * Issue a flush for the block device in question. Caller can supply |
| 211 | * room for storing the error offset in case of a flush error, if they |
Dmitry Monakhov | f17e232 | 2010-04-28 17:55:07 +0400 | [diff] [blame] | 212 | * wish to. If WAIT flag is not passed then caller may check only what |
| 213 | * request was pushed in some internal queue for later handling. |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 214 | */ |
Dmitry Monakhov | fbd9b09 | 2010-04-28 17:55:06 +0400 | [diff] [blame] | 215 | int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, |
Christoph Hellwig | dd3932e | 2010-09-16 20:51:46 +0200 | [diff] [blame] | 216 | sector_t *error_sector) |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 217 | { |
| 218 | DECLARE_COMPLETION_ONSTACK(wait); |
| 219 | struct request_queue *q; |
| 220 | struct bio *bio; |
Dmitry Monakhov | fbd9b09 | 2010-04-28 17:55:06 +0400 | [diff] [blame] | 221 | int ret = 0; |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 222 | |
| 223 | if (bdev->bd_disk == NULL) |
| 224 | return -ENXIO; |
| 225 | |
| 226 | q = bdev_get_queue(bdev); |
| 227 | if (!q) |
| 228 | return -ENXIO; |
| 229 | |
Dave Chinner | f10d9f6 | 2010-07-13 17:50:50 +1000 | [diff] [blame] | 230 | /* |
| 231 | * some block devices may not have their queue correctly set up here |
| 232 | * (e.g. loop device without a backing file) and so issuing a flush |
| 233 | * here will panic. Ensure there is a request function before issuing |
Tejun Heo | d391a2d | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 234 | * the flush. |
Dave Chinner | f10d9f6 | 2010-07-13 17:50:50 +1000 | [diff] [blame] | 235 | */ |
| 236 | if (!q->make_request_fn) |
| 237 | return -ENXIO; |
| 238 | |
Dmitry Monakhov | fbd9b09 | 2010-04-28 17:55:06 +0400 | [diff] [blame] | 239 | bio = bio_alloc(gfp_mask, 0); |
Tejun Heo | d391a2d | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 240 | bio->bi_end_io = bio_end_flush; |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 241 | bio->bi_bdev = bdev; |
Christoph Hellwig | dd3932e | 2010-09-16 20:51:46 +0200 | [diff] [blame] | 242 | bio->bi_private = &wait; |
Dmitry Monakhov | f17e232 | 2010-04-28 17:55:07 +0400 | [diff] [blame] | 243 | |
| 244 | bio_get(bio); |
Tejun Heo | d391a2d | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 245 | submit_bio(WRITE_FLUSH, bio); |
Christoph Hellwig | dd3932e | 2010-09-16 20:51:46 +0200 | [diff] [blame] | 246 | wait_for_completion(&wait); |
| 247 | |
| 248 | /* |
| 249 | * The driver must store the error location in ->bi_sector, if |
| 250 | * it supports it. For non-stacked drivers, this should be |
| 251 | * copied from blk_rq_pos(rq). |
| 252 | */ |
| 253 | if (error_sector) |
| 254 | *error_sector = bio->bi_sector; |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 255 | |
Tejun Heo | d391a2d | 2010-09-03 11:56:17 +0200 | [diff] [blame] | 256 | if (!bio_flagged(bio, BIO_UPTODATE)) |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 257 | ret = -EIO; |
| 258 | |
| 259 | bio_put(bio); |
| 260 | return ret; |
| 261 | } |
Jens Axboe | 86db1e2 | 2008-01-29 14:53:40 +0100 | [diff] [blame] | 262 | EXPORT_SYMBOL(blkdev_issue_flush); |