cfq-iosched: fix RCU problem in cfq_cic_lookup()

cfq_cic_lookup() needs to properly protect ioc->ioc_data before
dereferencing it and also exclude updaters of ioc->ioc_data as well.

Also add a number of comments documenting why the existing RCU usage
is OK.

Thanks a lot to "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> for
review and comments!

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 4df3f05..d01b411 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1142,6 +1142,9 @@
 	kmem_cache_free(cfq_pool, cfqq);
 }
 
+/*
+ * Must always be called with the rcu_read_lock() held
+ */
 static void
 __call_for_each_cic(struct io_context *ioc,
 		    void (*func)(struct io_context *, struct cfq_io_context *))
@@ -1197,6 +1200,11 @@
 	cfq_cic_free(cic);
 }
 
+/*
+ * Must be called with rcu_read_lock() held or preemption otherwise disabled.
+ * Only two callers of this - ->dtor() which is called with the rcu_read_lock(),
+ * and ->trim() which is called with the task lock held
+ */
 static void cfq_free_io_context(struct io_context *ioc)
 {
 	/*
@@ -1502,20 +1510,24 @@
 cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
 {
 	struct cfq_io_context *cic;
+	unsigned long flags;
 	void *k;
 
 	if (unlikely(!ioc))
 		return NULL;
 
+	rcu_read_lock();
+
 	/*
 	 * we maintain a last-hit cache, to avoid browsing over the tree
 	 */
 	cic = rcu_dereference(ioc->ioc_data);
-	if (cic && cic->key == cfqd)
+	if (cic && cic->key == cfqd) {
+		rcu_read_unlock();
 		return cic;
+	}
 
 	do {
-		rcu_read_lock();
 		cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd);
 		rcu_read_unlock();
 		if (!cic)
@@ -1524,10 +1536,13 @@
 		k = cic->key;
 		if (unlikely(!k)) {
 			cfq_drop_dead_cic(cfqd, ioc, cic);
+			rcu_read_lock();
 			continue;
 		}
 
+		spin_lock_irqsave(&ioc->lock, flags);
 		rcu_assign_pointer(ioc->ioc_data, cic);
+		spin_unlock_irqrestore(&ioc->lock, flags);
 		break;
 	} while (1);
 
@@ -2134,6 +2149,10 @@
 
 static void cfq_slab_kill(void)
 {
+	/*
+	 * Caller already ensured that pending RCU callbacks are completed,
+	 * so we should have no busy allocations at this point.
+	 */
 	if (cfq_pool)
 		kmem_cache_destroy(cfq_pool);
 	if (cfq_ioc_pool)
@@ -2292,6 +2311,11 @@
 	ioc_gone = &all_gone;
 	/* ioc_gone's update must be visible before reading ioc_count */
 	smp_wmb();
+
+	/*
+	 * this also protects us from entering cfq_slab_kill() with
+	 * pending RCU callbacks
+	 */
 	if (elv_ioc_count_read(ioc_count))
 		wait_for_completion(ioc_gone);
 	cfq_slab_kill();