btrfs_start_transaction: wait for commits in progress to finish

btrfs_commit_transaction has to loop waiting for any writers in the
transaction to finish before it can proceed.  btrfs_start_transaction
should be polite and not join a transaction that is in the process
of being finished off.

There are a few places that can't wait, basically the ones doing IO that
might be needed to finish the transaction.  For them, btrfs_join_transaction
is added.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4ddc8a8..acbce54 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -513,6 +513,7 @@
 	u64 alloc_start;
 	struct btrfs_transaction *running_transaction;
 	wait_queue_head_t transaction_throttle;
+	wait_queue_head_t transaction_wait;
 	struct btrfs_super_block super_copy;
 	struct btrfs_super_block super_for_commit;
 	struct block_device *__bdev;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 66466d1..99bb385 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1291,6 +1291,7 @@
 	mutex_init(&fs_info->cleaner_mutex);
 	mutex_init(&fs_info->volume_mutex);
 	init_waitqueue_head(&fs_info->transaction_throttle);
+	init_waitqueue_head(&fs_info->transaction_wait);
 
 #if 0
 	ret = add_hasher(fs_info, "crc32c");
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 3e4e5c2..d650589 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -251,7 +251,7 @@
 	end_of_last_block = start_pos + num_bytes - 1;
 
 	lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
-	trans = btrfs_start_transaction(root, 1);
+	trans = btrfs_join_transaction(root, 1);
 	if (!trans) {
 		err = -ENOMEM;
 		goto out_unlock;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index baf4601..0a68732 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -116,7 +116,7 @@
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 	int ret = 0;
 
-	trans = btrfs_start_transaction(root, 1);
+	trans = btrfs_join_transaction(root, 1);
 	BUG_ON(!trans);
 	btrfs_set_trans_block_group(trans, inode);
 
@@ -502,7 +502,7 @@
 		return 0;
 	}
 
-	trans = btrfs_start_transaction(root, 1);
+	trans = btrfs_join_transaction(root, 1);
 
 	ordered_extent = btrfs_lookup_ordered_extent(inode, start);
 	BUG_ON(!ordered_extent);
@@ -1812,7 +1812,7 @@
 	int ret = 0;
 
 	if (wait) {
-		trans = btrfs_start_transaction(root, 1);
+		trans = btrfs_join_transaction(root, 1);
 		btrfs_set_trans_block_group(trans, inode);
 		ret = btrfs_commit_transaction(trans, root);
 	}
@@ -1830,7 +1830,7 @@
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_trans_handle *trans;
 
-	trans = btrfs_start_transaction(root, 1);
+	trans = btrfs_join_transaction(root, 1);
 	btrfs_set_trans_block_group(trans, inode);
 	btrfs_update_inode(trans, root, inode);
 	btrfs_end_transaction(trans, root);
@@ -2395,7 +2395,7 @@
 				free_extent_map(em);
 				em = NULL;
 				btrfs_release_path(root, path);
-				trans = btrfs_start_transaction(root, 1);
+				trans = btrfs_join_transaction(root, 1);
 				goto again;
 			}
 			write_extent_buffer(leaf, map + pg_offset, ptr,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 86a5acc..0582390 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -62,6 +62,7 @@
 		init_waitqueue_head(&cur_trans->writer_wait);
 		init_waitqueue_head(&cur_trans->commit_wait);
 		cur_trans->in_commit = 0;
+		cur_trans->blocked = 0;
 		cur_trans->use_count = 1;
 		cur_trans->commit_done = 0;
 		cur_trans->start_time = get_seconds();
@@ -99,14 +100,36 @@
 	return 0;
 }
 
-struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
-						   int num_blocks)
+struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
+					     int num_blocks, int join)
 {
 	struct btrfs_trans_handle *h =
 		kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
+	struct btrfs_transaction *cur_trans;
 	int ret;
 
 	mutex_lock(&root->fs_info->trans_mutex);
+	cur_trans = root->fs_info->running_transaction;
+	if (cur_trans && cur_trans->blocked && !join) {
+		DEFINE_WAIT(wait);
+		cur_trans->use_count++;
+		while(1) {
+			prepare_to_wait(&root->fs_info->transaction_wait, &wait,
+					TASK_UNINTERRUPTIBLE);
+			if (cur_trans->blocked) {
+				mutex_unlock(&root->fs_info->trans_mutex);
+				schedule();
+				mutex_lock(&root->fs_info->trans_mutex);
+				finish_wait(&root->fs_info->transaction_wait,
+					    &wait);
+			} else {
+				finish_wait(&root->fs_info->transaction_wait,
+					    &wait);
+				break;
+			}
+		}
+		put_transaction(cur_trans);
+	}
 	ret = join_transaction(root);
 	BUG_ON(ret);
 
@@ -123,6 +146,17 @@
 	return h;
 }
 
+struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
+						   int num_blocks)
+{
+	return start_transaction(root, num_blocks, 0);
+}
+struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
+						   int num_blocks)
+{
+	return start_transaction(root, num_blocks, 1);
+}
+
 static noinline int wait_for_commit(struct btrfs_root *root,
 				    struct btrfs_transaction *commit)
 {
@@ -156,7 +190,7 @@
 	if (waitqueue_active(&cur_trans->writer_wait))
 		wake_up(&cur_trans->writer_wait);
 
-	if (cur_trans->in_commit && throttle) {
+	if (0 && cur_trans->in_commit && throttle) {
 		DEFINE_WAIT(wait);
 		mutex_unlock(&root->fs_info->trans_mutex);
 		prepare_to_wait(&root->fs_info->transaction_throttle, &wait,
@@ -617,6 +651,7 @@
 
 printk("commit trans %Lu\n", trans->transid);
 	trans->transaction->in_commit = 1;
+	trans->transaction->blocked = 1;
 	cur_trans = trans->transaction;
 	if (cur_trans->list.prev != &root->fs_info->trans_list) {
 		prev_trans = list_entry(cur_trans->list.prev,
@@ -684,7 +719,9 @@
 
 	btrfs_copy_pinned(root, pinned_copy);
 
+	trans->transaction->blocked = 0;
 	wake_up(&root->fs_info->transaction_throttle);
+	wake_up(&root->fs_info->transaction_wait);
 
 	mutex_unlock(&root->fs_info->trans_mutex);
 	ret = btrfs_write_and_wait_transaction(trans, root);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 910350c..11fbdec 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -27,6 +27,7 @@
 	int in_commit;
 	int use_count;
 	int commit_done;
+	int blocked;
 	struct list_head list;
 	struct extent_io_tree dirty_pages;
 	unsigned long start_time;
@@ -75,6 +76,8 @@
 			  struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
 						   int num_blocks);
+struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
+						   int num_blocks);
 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
 				     struct btrfs_root *root);
 int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,