Btrfs: Transaction commit: don't use filemap_fdatawait

After writing out all the remaining btree blocks in the transaction,
the commit code would use filemap_fdatawait to make sure it was all
on disk.  This means it would wait for blocks written by other procs
as well.

The new code walks the list of blocks for this transaction again
and waits only for those required by this transaction.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9902d29..9601b13 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -429,7 +429,7 @@
 	return 0;
 }
 
-static int congested_async(struct btrfs_fs_info *info, int iodone)
+int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
 {
 	int limit = 256 * info->fs_devices->open_devices;
 
@@ -438,9 +438,6 @@
 	if (atomic_read(&info->nr_async_submits) > limit)
 		return 1;
 
-	limit = 8192 * info->fs_devices->open_devices;
-	if (iodone)
-		limit = (limit * 3) / 2;
 	return atomic_read(&info->nr_async_bios) > limit;
 }
 
@@ -454,7 +451,7 @@
 	atomic_dec(&fs_info->nr_async_submits);
 
 	if ((async->bio->bi_rw & (1 << BIO_RW)) &&
-	    !congested_async(fs_info, 1)) {
+	    !btrfs_congested_async(fs_info, 1)) {
 		clear_bdi_congested(&fs_info->bdi, WRITE);
 	}
 	async->submit_bio_hook(async->inode, async->rw, async->bio,
@@ -963,7 +960,7 @@
 	struct backing_dev_info *bdi;
 
 	if ((bdi_bits & (1 << BDI_write_congested)) &&
-	    congested_async(info, 0))
+	    btrfs_congested_async(info, 0))
 		return 1;
 
 	list_for_each(cur, &info->fs_devices->devices) {
@@ -1844,7 +1841,7 @@
 	struct extent_io_tree *tree;
 	u64 num_dirty;
 	u64 start = 0;
-	unsigned long thresh = 16 * 1024 * 1024;
+	unsigned long thresh = 2 * 1024 * 1024;
 	tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
 
 	if (current_is_pdflush())
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 353c3c5..e904a69 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -72,4 +72,5 @@
 int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
 			int rw, struct bio *bio, int mirror_num,
 			extent_submit_bio_hook_t *submit_bio_hook);
+int btrfs_congested_async(struct btrfs_fs_info *info, int iodone);
 #endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 9d3d08e..6bcb087 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -303,12 +303,12 @@
 				     struct btrfs_root *root)
 {
 	int ret;
-	int err;
+	int err = 0;
 	int werr = 0;
 	struct extent_io_tree *dirty_pages;
 	struct page *page;
 	struct inode *btree_inode = root->fs_info->btree_inode;
-	u64 start;
+	u64 start = 0;
 	u64 end;
 	unsigned long index;
 
@@ -317,12 +317,15 @@
 	}
 	dirty_pages = &trans->transaction->dirty_pages;
 	while(1) {
-		ret = find_first_extent_bit(dirty_pages, 0, &start, &end,
+		ret = find_first_extent_bit(dirty_pages, start, &start, &end,
 					    EXTENT_DIRTY);
 		if (ret)
 			break;
-		clear_extent_dirty(dirty_pages, start, end, GFP_NOFS);
 		while(start <= end) {
+			if (btrfs_congested_async(root->fs_info, 0))
+				congestion_wait(WRITE, HZ/10);
+			cond_resched();
+
 			index = start >> PAGE_CACHE_SHIFT;
 			start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
 			page = find_lock_page(btree_inode->i_mapping, index);
@@ -343,7 +346,30 @@
 			page_cache_release(page);
 		}
 	}
-	err = filemap_fdatawait(btree_inode->i_mapping);
+	while(1) {
+		ret = find_first_extent_bit(dirty_pages, 0, &start, &end,
+					    EXTENT_DIRTY);
+		if (ret)
+			break;
+
+		clear_extent_dirty(dirty_pages, start, end, GFP_NOFS);
+		while(start <= end) {
+			index = start >> PAGE_CACHE_SHIFT;
+			start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
+			page = find_get_page(btree_inode->i_mapping, index);
+			if (!page)
+				continue;
+			if (PageDirty(page)) {
+				lock_page(page);
+				err = write_one_page(page, 0);
+				if (err)
+					werr = err;
+			}
+			wait_on_page_writeback(page);
+			page_cache_release(page);
+			cond_resched();
+		}
+	}
 	if (err)
 		werr = err;
 	return werr;