Allow elevators to sort/merge discard requests

But blkdev_issue_discard() still emits requests which are interpreted as
soft barriers, because naïve callers might otherwise issue subsequent
writes to those same sectors, which might cross on the queue (if they're
reallocated quickly enough).

Callers still _can_ issue non-barrier discard requests, but they have to
take care of queue ordering for themselves.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index e544813..988b634 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -372,7 +372,7 @@
 			nr_sects = 0;
 		}
 		bio_get(bio);
-		submit_bio(WRITE_DISCARD, bio);
+		submit_bio(DISCARD_BARRIER, bio);
 
 		/* Check if it failed immediately */
 		if (bio_flagged(bio, BIO_EOPNOTSUPP))
diff --git a/block/blk-core.c b/block/blk-core.c
index 1e143c4..1261516 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1077,12 +1077,13 @@
 	/*
 	 * REQ_BARRIER implies no merging, but lets make it explicit
 	 */
-	if (unlikely(bio_barrier(bio)))
-		req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
 	if (unlikely(bio_discard(bio))) {
-		req->cmd_flags |= (REQ_SOFTBARRIER | REQ_DISCARD);
+		req->cmd_flags |= REQ_DISCARD;
+		if (bio_barrier(bio))
+			req->cmd_flags |= REQ_SOFTBARRIER;
 		req->q->prepare_discard_fn(req->q, req);
-	}
+	} else if (unlikely(bio_barrier(bio)))
+		req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
 
 	if (bio_sync(bio))
 		req->cmd_flags |= REQ_RW_SYNC;
@@ -1114,7 +1115,8 @@
 	blk_queue_bounce(q, &bio);
 
 	barrier = bio_barrier(bio);
-	if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
+	if (unlikely(barrier) && bio_has_data(bio) &&
+	    (q->next_ordered == QUEUE_ORDERED_NONE)) {
 		err = -EOPNOTSUPP;
 		goto end_io;
 	}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 5efc9e7..6cf8f0c 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -11,7 +11,7 @@
 
 void blk_recalc_rq_sectors(struct request *rq, int nsect)
 {
-	if (blk_fs_request(rq)) {
+	if (blk_fs_request(rq) || blk_discard_rq(rq)) {
 		rq->hard_sector += nsect;
 		rq->hard_nr_sectors -= nsect;
 
@@ -131,13 +131,17 @@
 	if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
 		return 0;
 
-	if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
-		return 0;
 	if (bio->bi_size + nxt->bi_size > q->max_segment_size)
 		return 0;
 
+	if (!bio_has_data(bio))
+		return 1;
+
+	if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
+		return 0;
+
 	/*
-	 * bio and nxt are contigous in memory, check if the queue allows
+	 * bio and nxt are contiguous in memory; check if the queue allows
 	 * these two to be merged into one
 	 */
 	if (BIO_SEG_BOUNDARY(q, bio, nxt))
@@ -153,8 +157,9 @@
 		blk_recount_segments(q, bio);
 	if (!bio_flagged(nxt, BIO_SEG_VALID))
 		blk_recount_segments(q, nxt);
-	if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
-	    BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size))
+	if (bio_has_data(bio) &&
+	    (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
+	     BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size)))
 		return 0;
 	if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size)
 		return 0;
@@ -317,8 +322,9 @@
 	if (!bio_flagged(bio, BIO_SEG_VALID))
 		blk_recount_segments(q, bio);
 	len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
-	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio))
-	    && !BIOVEC_VIRT_OVERSIZE(len)) {
+	if (!bio_has_data(bio) || 
+	    (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio))
+	     && !BIOVEC_VIRT_OVERSIZE(len))) {
 		int mergeable =  ll_new_mergeable(q, req, bio);
 
 		if (mergeable) {
@@ -356,8 +362,9 @@
 		blk_recount_segments(q, bio);
 	if (!bio_flagged(req->bio, BIO_SEG_VALID))
 		blk_recount_segments(q, req->bio);
-	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
-	    !BIOVEC_VIRT_OVERSIZE(len)) {
+	if (!bio_has_data(bio) || 
+	    (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
+	     !BIOVEC_VIRT_OVERSIZE(len))) {
 		int mergeable =  ll_new_mergeable(q, req, bio);
 
 		if (mergeable) {
diff --git a/block/elevator.c b/block/elevator.c
index ed6f8f3..4f51270 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -75,6 +75,12 @@
 		return 0;
 
 	/*
+	 * Don't merge file system requests and discard requests
+	 */
+	if (bio_discard(bio) != bio_discard(rq->bio))
+		return 0;
+
+	/*
 	 * different data direction or already started, don't merge
 	 */
 	if (bio_data_dir(bio) != rq_data_dir(rq))
@@ -438,6 +444,8 @@
 	list_for_each_prev(entry, &q->queue_head) {
 		struct request *pos = list_entry_rq(entry);
 
+		if (blk_discard_rq(rq) != blk_discard_rq(pos))
+			break;
 		if (rq_data_dir(rq) != rq_data_dir(pos))
 			break;
 		if (pos->cmd_flags & stop_flags)
@@ -607,7 +615,7 @@
 		break;
 
 	case ELEVATOR_INSERT_SORT:
-		BUG_ON(!blk_fs_request(rq));
+		BUG_ON(!blk_fs_request(rq) && !blk_discard_rq(rq));
 		rq->cmd_flags |= REQ_SORTED;
 		q->nr_sorted++;
 		if (rq_mergeable(rq)) {
@@ -692,7 +700,7 @@
 		 * this request is scheduling boundary, update
 		 * end_sector
 		 */
-		if (blk_fs_request(rq)) {
+		if (blk_fs_request(rq) || blk_discard_rq(rq)) {
 			q->end_sector = rq_end_sector(rq);
 			q->boundary_rq = rq;
 		}
diff --git a/block/ioctl.c b/block/ioctl.c
index 342298b..375c579 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -161,7 +161,7 @@
 			bio->bi_size = len << 9;
 			len = 0;
 		}
-		submit_bio(WRITE_DISCARD, bio);
+		submit_bio(DISCARD_NOBARRIER, bio);
 
 		wait_for_completion(&wait);
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 1fdfc56..33c3947 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -188,8 +188,8 @@
 #define bio_failfast(bio)	((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
 #define bio_rw_ahead(bio)	((bio)->bi_rw & (1 << BIO_RW_AHEAD))
 #define bio_rw_meta(bio)	((bio)->bi_rw & (1 << BIO_RW_META))
-#define bio_empty_barrier(bio)	(bio_barrier(bio) && !bio_has_data(bio))
 #define bio_discard(bio)	((bio)->bi_rw & (1 << BIO_RW_DISCARD))
+#define bio_empty_barrier(bio)	(bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio))
 
 static inline unsigned int bio_cur_sectors(struct bio *bio)
 {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 727886d..e9eb35c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -541,7 +541,7 @@
 #define blk_noretry_request(rq)	((rq)->cmd_flags & REQ_FAILFAST)
 #define blk_rq_started(rq)	((rq)->cmd_flags & REQ_STARTED)
 
-#define blk_account_rq(rq)	(blk_rq_started(rq) && blk_fs_request(rq))
+#define blk_account_rq(rq)	(blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) 
 
 #define blk_pm_suspend_request(rq)	((rq)->cmd_type == REQ_TYPE_PM_SUSPEND)
 #define blk_pm_resume_request(rq)	((rq)->cmd_type == REQ_TYPE_PM_RESUME)
@@ -598,7 +598,8 @@
 #define RQ_NOMERGE_FLAGS	\
 	(REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
 #define rq_mergeable(rq)	\
-	(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq)))
+	(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
+	 (blk_discard_rq(rq) || blk_fs_request((rq))))
 
 /*
  * q->prep_rq_fn return values
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 88358ca..860689f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -87,7 +87,8 @@
 #define WRITE_SYNC	(WRITE | (1 << BIO_RW_SYNC))
 #define SWRITE_SYNC	(SWRITE | (1 << BIO_RW_SYNC))
 #define WRITE_BARRIER	(WRITE | (1 << BIO_RW_BARRIER))
-#define WRITE_DISCARD	(WRITE | (1 << BIO_RW_DISCARD))
+#define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD)
+#define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER))
 
 #define SEL_IN		1
 #define SEL_OUT		2