block, cfq: move icq creation and rq->elv.icq association to block core Now block layer knows everything necessary to create and associate icq's with requests. Move ioc_create_icq() to blk-ioc.c and update get_request() such that, if elevator_type->icq_size is set, requests are automatically associated with their matching icq's before elv_set_request(). io_context reference is also managed by block core on request alloc/free. * Only ioprio/cgroup changed handling remains from cfq_get_cic(). Collapsed into cfq_set_request(). * This removes queue kicking on icq allocation failure (for now). As icq allocation failure is rare and the only effect of queue kicking achieved was possibily accelerating queue processing, this change shouldn't be noticeable. There is a larger underlying problem. Unlike request allocation, icq allocation is not guaranteed to succeed eventually after retries. The number of icq is unbound and thus mempool can't be the solution either. This effectively adds allocation dependency on memory free path and thus possibility of deadlock. This usually wouldn't happen because icq allocation is not a hot path and, even when the condition triggers, it's highly unlikely that none of the writeback workers already has icq. However, this is still possible especially if elevator is being switched under high memory pressure, so we better get it fixed. Probably the only solution is just bypassing elevator and appending to dispatch queue on any elevator allocation failure. * Comment added to explain how icq's are managed and synchronized. This completes cleanup of io_context interface. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>

commit: f1f8cc94651738b418ba54c039df536303b91704 [log] [tgz]
author: Tejun Heo <tj@kernel.org> Wed Dec 14 00:33:42 2011 +0100
committer: Jens Axboe <axboe@kernel.dk> Wed Dec 14 00:33:42 2011 +0100
tree: eb8bc5a33dec104ab32a935a5bb1e1da2e7cdd34
parent: 9b84cacd013996f244d85b3d873287c2a8f88658 [diff] [blame]
diff --git a/block/blk-core.c b/block/blk-core.c
index 3c26c7f..8fbdac7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c

@@ -640,13 +640,18 @@
 
 static inline void blk_free_request(struct request_queue *q, struct request *rq)
 {
-	if (rq->cmd_flags & REQ_ELVPRIV)
+	if (rq->cmd_flags & REQ_ELVPRIV) {
 		elv_put_request(q, rq);
+		if (rq->elv.icq)
+			put_io_context(rq->elv.icq->ioc, q);
+	}
+
 	mempool_free(rq, q->rq.rq_pool);
 }
 
 static struct request *
-blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
+blk_alloc_request(struct request_queue *q, struct io_cq *icq,
+		  unsigned int flags, gfp_t gfp_mask)
 {
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 
@@ -657,10 +662,15 @@
 
 	rq->cmd_flags = flags | REQ_ALLOCED;
 
-	if ((flags & REQ_ELVPRIV) &&
-	    unlikely(elv_set_request(q, rq, gfp_mask))) {
-		mempool_free(rq, q->rq.rq_pool);
-		return NULL;
+	if (flags & REQ_ELVPRIV) {
+		rq->elv.icq = icq;
+		if (unlikely(elv_set_request(q, rq, gfp_mask))) {
+			mempool_free(rq, q->rq.rq_pool);
+			return NULL;
+		}
+		/* @rq->elv.icq holds on to io_context until @rq is freed */
+		if (icq)
+			get_io_context(icq->ioc);
 	}
 
 	return rq;
@@ -772,11 +782,14 @@
 {
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
+	struct elevator_type *et;
 	struct io_context *ioc;
+	struct io_cq *icq = NULL;
 	const bool is_sync = rw_is_sync(rw_flags) != 0;
 	bool retried = false;
 	int may_queue;
 retry:
+	et = q->elevator->type;
 	ioc = current->io_context;
 
 	if (unlikely(blk_queue_dead(q)))
@@ -837,17 +850,36 @@
 	rl->count[is_sync]++;
 	rl->starved[is_sync] = 0;
 
+	/*
+	 * Decide whether the new request will be managed by elevator.  If
+	 * so, mark @rw_flags and increment elvpriv.  Non-zero elvpriv will
+	 * prevent the current elevator from being destroyed until the new
+	 * request is freed.  This guarantees icq's won't be destroyed and
+	 * makes creating new ones safe.
+	 *
+	 * Also, lookup icq while holding queue_lock.  If it doesn't exist,
+	 * it will be created after releasing queue_lock.
+	 */
 	if (blk_rq_should_init_elevator(bio) &&
 	    !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {
 		rw_flags |= REQ_ELVPRIV;
 		rl->elvpriv++;
+		if (et->icq_cache && ioc)
+			icq = ioc_lookup_icq(ioc, q);
 	}
 
 	if (blk_queue_io_stat(q))
 		rw_flags |= REQ_IO_STAT;
 	spin_unlock_irq(q->queue_lock);
 
-	rq = blk_alloc_request(q, rw_flags, gfp_mask);
+	/* create icq if missing */
+	if (unlikely(et->icq_cache && !icq))
+		icq = ioc_create_icq(q, gfp_mask);
+
+	/* rqs are guaranteed to have icq on elv_set_request() if requested */
+	if (likely(!et->icq_cache || icq))
+		rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
+
 	if (unlikely(!rq)) {
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
commit	f1f8cc94651738b418ba54c039df536303b91704	[log] [tgz]
author	Tejun Heo <tj@kernel.org>	Wed Dec 14 00:33:42 2011 +0100
committer	Jens Axboe <axboe@kernel.dk>	Wed Dec 14 00:33:42 2011 +0100
tree	eb8bc5a33dec104ab32a935a5bb1e1da2e7cdd34
parent	9b84cacd013996f244d85b3d873287c2a8f88658 [diff] [blame]