cfq-iosched: move cfq_group determination from cfq_find_alloc_queue() to cfq_get_queue()

This is necessary for making async cfq_cgroups per-cfq_group instead
of per-cfq_data.  While this change makes cfq_get_queue() perform RCU
locking and look up cfq_group even when it reuses async queue, the
extra overhead is extremely unlikely to be noticeable given that this
is already sitting behind cic->cfqq[] cache and the overall cost of
cfq operation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Arianna Avanzini <avanzini.arianna@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 146b03d..f3ea8a1 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3641,21 +3641,10 @@
 #endif  /* CONFIG_CFQ_GROUP_IOSCHED */
 
 static struct cfq_queue *
-cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
-		     struct bio *bio)
+cfq_find_alloc_queue(struct cfq_data *cfqd, struct cfq_group *cfqg, bool is_sync,
+		     struct cfq_io_cq *cic, struct bio *bio)
 {
-	struct blkcg *blkcg;
 	struct cfq_queue *cfqq;
-	struct cfq_group *cfqg;
-
-	rcu_read_lock();
-
-	blkcg = bio_blkcg(bio);
-	cfqg = cfq_lookup_create_cfqg(cfqd, blkcg);
-	if (!cfqg) {
-		cfqq = &cfqd->oom_cfqq;
-		goto out;
-	}
 
 	cfqq = cic_to_cfqq(cic, is_sync);
 
@@ -3675,8 +3664,6 @@
 		} else
 			cfqq = &cfqd->oom_cfqq;
 	}
-out:
-	rcu_read_unlock();
 	return cfqq;
 }
 
@@ -3706,6 +3693,14 @@
 	int ioprio = IOPRIO_PRIO_DATA(cic->ioprio);
 	struct cfq_queue **async_cfqq;
 	struct cfq_queue *cfqq;
+	struct cfq_group *cfqg;
+
+	rcu_read_lock();
+	cfqg = cfq_lookup_create_cfqg(cfqd, bio_blkcg(bio));
+	if (!cfqg) {
+		cfqq = &cfqd->oom_cfqq;
+		goto out;
+	}
 
 	if (!is_sync) {
 		if (!ioprio_valid(cic->ioprio)) {
@@ -3719,7 +3714,7 @@
 			goto out;
 	}
 
-	cfqq = cfq_find_alloc_queue(cfqd, is_sync, cic, bio);
+	cfqq = cfq_find_alloc_queue(cfqd, cfqg, is_sync, cic, bio);
 
 	/*
 	 * pin the queue now that it's allocated, scheduler exit will prune it
@@ -3730,6 +3725,7 @@
 	}
 out:
 	cfqq->ref++;
+	rcu_read_unlock();
 	return cfqq;
 }