cfq-iosched: use call_rcu() instead of doing grace period stall on queue exit

After the merge of the IO controller patches, booting on my megaraid
box ran much slower. Vivek Goyal traced it down to megaraid discovery
creating tons of devices, each suffering a grace period when they later
kill that queue (if no device is found).

So lets use call_rcu() to batch these deferred frees, instead of taking
the grace period hit for each one.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 78f4829..3815f97 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -287,6 +287,7 @@
 
 	/* List of cfq groups being managed on this device*/
 	struct hlist_head cfqg_list;
+	struct rcu_head rcu;
 };
 
 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
@@ -3601,6 +3602,11 @@
 		cfq_put_queue(cfqd->async_idle_cfqq);
 }
 
+static void cfq_cfqd_free(struct rcu_head *head)
+{
+	kfree(container_of(head, struct cfq_data, rcu));
+}
+
 static void cfq_exit_queue(struct elevator_queue *e)
 {
 	struct cfq_data *cfqd = e->elevator_data;
@@ -3630,8 +3636,7 @@
 	cfq_shutdown_timer_wq(cfqd);
 
 	/* Wait for cfqg->blkg->key accessors to exit their grace periods. */
-	synchronize_rcu();
-	kfree(cfqd);
+	call_rcu(&cfqd->rcu, cfq_cfqd_free);
 }
 
 static void *cfq_init_queue(struct request_queue *q)
@@ -3706,6 +3711,7 @@
 	cfqd->cfq_group_isolation = 0;
 	cfqd->hw_tag = -1;
 	cfqd->last_end_sync_rq = jiffies;
+	INIT_RCU_HEAD(&cfqd->rcu);
 	return cfqd;
 }