writeback: make writeback_control track the inode being written back

Currently, for cgroup writeback, the IO submission paths directly
associate the bio's with the blkcg from inode_to_wb_blkcg_css();
however, it'd be necessary to keep more writeback context to implement
foreign inode writeback detection.  wbc (writeback_control) is the
natural fit for the extra context - it persists throughout the
writeback of each inode and is passed all the way down to IO
submission paths.

This patch adds wbc_attach_and_unlock_inode(), wbc_detach_inode(), and
wbc_attach_fdatawrite_inode() which are used to associate wbc with the
inode being written back.  IO submission paths now use wbc_init_bio()
instead of directly associating bio's with blkcg themselves.  This
leaves inode_to_wb_blkcg_css() w/o any user.  The function is removed.

wbc currently only tracks the associated wb (bdi_writeback).  Future
patches will add more for foreign inode detection.  The association is
established under i_lock which will be depended upon when migrating
foreign inodes to other wb's.

As currently, once established, inode to wb association never changes,
going through wbc when initializing bio's doesn't cause any behavior
changes.

v2: submit_blk_blkcg() now checks whether the wbc is associated with a
    wb before dereferencing it.  This can happen when pageout() is
    writing pages directly without going through the usual writeback
    path.  As pageout() path is single-threaded, we don't want it to
    be blocked behind a slow cgroup and ultimately want it to delegate
    actual writing to the usual writeback path.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jan Kara <jack@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
diff --git a/fs/buffer.c b/fs/buffer.c
index b85e941..d883c79 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -45,9 +45,9 @@
 #include <trace/events/block.h>
 
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
-static int submit_bh_blkcg(int rw, struct buffer_head *bh,
-			   unsigned long bio_flags,
-			   struct cgroup_subsys_state *blkcg_css);
+static int submit_bh_wbc(int rw, struct buffer_head *bh,
+			 unsigned long bio_flags,
+			 struct writeback_control *wbc);
 
 #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
 
@@ -1709,7 +1709,6 @@
 	unsigned int blocksize, bbits;
 	int nr_underway = 0;
 	int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
-	struct cgroup_subsys_state *blkcg_css = inode_to_wb_blkcg_css(inode);
 
 	head = create_page_buffers(page, inode,
 					(1 << BH_Dirty)|(1 << BH_Uptodate));
@@ -1798,7 +1797,7 @@
 	do {
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
-			submit_bh_blkcg(write_op, bh, 0, blkcg_css);
+			submit_bh_wbc(write_op, bh, 0, wbc);
 			nr_underway++;
 		}
 		bh = next;
@@ -1852,7 +1851,7 @@
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
 			clear_buffer_dirty(bh);
-			submit_bh_blkcg(write_op, bh, 0, blkcg_css);
+			submit_bh_wbc(write_op, bh, 0, wbc);
 			nr_underway++;
 		}
 		bh = next;
@@ -3017,11 +3016,11 @@
 	}
 }
 
-static int submit_bh_blkcg(int rw, struct buffer_head *bh,
-			   unsigned long bio_flags,
-			   struct cgroup_subsys_state *blkcg_css)
+static int submit_bh_wbc(int rw, struct buffer_head *bh,
+			 unsigned long bio_flags, struct writeback_control *wbc)
 {
 	struct bio *bio;
+	int ret = 0;
 
 	BUG_ON(!buffer_locked(bh));
 	BUG_ON(!buffer_mapped(bh));
@@ -3041,8 +3040,8 @@
 	 */
 	bio = bio_alloc(GFP_NOIO, 1);
 
-	if (blkcg_css)
-		bio_associate_blkcg(bio, blkcg_css);
+	if (wbc)
+		wbc_init_bio(wbc, bio);
 
 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
 	bio->bi_bdev = bh->b_bdev;
@@ -3071,13 +3070,13 @@
 
 int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
 {
-	return submit_bh_blkcg(rw, bh, bio_flags, NULL);
+	return submit_bh_wbc(rw, bh, bio_flags, NULL);
 }
 EXPORT_SYMBOL_GPL(_submit_bh);
 
 int submit_bh(int rw, struct buffer_head *bh)
 {
-	return submit_bh_blkcg(rw, bh, 0, NULL);
+	return submit_bh_wbc(rw, bh, 0, NULL);
 }
 EXPORT_SYMBOL(submit_bh);
 
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index cf6ccfb..755e8ef 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -245,6 +245,37 @@
 }
 
 /**
+ * wbc_attach_and_unlock_inode - associate wbc with target inode and unlock it
+ * @wbc: writeback_control of interest
+ * @inode: target inode
+ *
+ * @inode is locked and about to be written back under the control of @wbc.
+ * Record @inode's writeback context into @wbc and unlock the i_lock.  On
+ * writeback completion, wbc_detach_inode() should be called.  This is used
+ * to track the cgroup writeback context.
+ */
+void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
+				 struct inode *inode)
+{
+	wbc->wb = inode_to_wb(inode);
+	wb_get(wbc->wb);
+	spin_unlock(&inode->i_lock);
+}
+
+/**
+ * wbc_detach_inode - disassociate wbc from its target inode
+ * @wbc: writeback_control of interest
+ *
+ * To be called after a writeback attempt of an inode finishes and undoes
+ * wbc_attach_and_unlock_inode().  Can be called under any context.
+ */
+void wbc_detach_inode(struct writeback_control *wbc)
+{
+	wb_put(wbc->wb);
+	wbc->wb = NULL;
+}
+
+/**
  * inode_congested - test whether an inode is congested
  * @inode: inode to test for congestion
  * @cong_bits: mask of WB_[a]sync_congested bits to test
@@ -877,10 +908,11 @@
 	     !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
 		goto out;
 	inode->i_state |= I_SYNC;
-	spin_unlock(&inode->i_lock);
+	wbc_attach_and_unlock_inode(wbc, inode);
 
 	ret = __writeback_single_inode(inode, wbc);
 
+	wbc_detach_inode(wbc);
 	spin_lock(&wb->list_lock);
 	spin_lock(&inode->i_lock);
 	/*
@@ -1013,7 +1045,7 @@
 			continue;
 		}
 		inode->i_state |= I_SYNC;
-		spin_unlock(&inode->i_lock);
+		wbc_attach_and_unlock_inode(&wbc, inode);
 
 		write_chunk = writeback_chunk_size(wb, work);
 		wbc.nr_to_write = write_chunk;
@@ -1025,6 +1057,7 @@
 		 */
 		__writeback_single_inode(inode, &wbc);
 
+		wbc_detach_inode(&wbc);
 		work->nr_pages -= write_chunk - wbc.nr_to_write;
 		wrote += write_chunk - wbc.nr_to_write;
 		spin_lock(&wb->list_lock);
diff --git a/fs/mpage.c b/fs/mpage.c
index a3ccb0b..388fde6 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -606,7 +606,7 @@
 		if (bio == NULL)
 			goto confused;
 
-		bio_associate_blkcg(bio, inode_to_wb_blkcg_css(inode));
+		wbc_init_bio(wbc, bio);
 	}
 
 	/*