Btrfs: extent_map and data=ordered fixes for space balancing

* Add an EXTENT_BOUNDARY state bit to keep the writepage code
from merging data extents that are in the process of being
relocated.  This allows us to do accounting for them properly.

* The balancing code relocates data extents indepdent of the underlying
inode.  The extent_map code was modified to properly account for
things moving around (invalidating extent_map caches in the inode).

* Don't take the drop_mutex in the create_subvol ioctl.  It isn't
required.

* Fix walking of the ordered extent list to avoid races with sys_unlink

* Change the lock ordering rules.  Transaction start goes outside
the drop_mutex.  This allows btrfs_commit_transaction to directly
drop the relocation trees.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index da6d43e..951eacf 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -309,7 +309,6 @@
 {
 	struct list_head splice;
 	struct list_head *cur;
-	struct list_head *tmp;
 	struct btrfs_ordered_extent *ordered;
 	struct inode *inode;
 
@@ -317,37 +316,38 @@
 
 	spin_lock(&root->fs_info->ordered_extent_lock);
 	list_splice_init(&root->fs_info->ordered_extents, &splice);
-	list_for_each_safe(cur, tmp, &splice) {
+	while (!list_empty(&splice)) {
 		cur = splice.next;
 		ordered = list_entry(cur, struct btrfs_ordered_extent,
 				     root_extent_list);
 		if (nocow_only &&
 		    !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
+			list_move(&ordered->root_extent_list,
+				  &root->fs_info->ordered_extents);
 			cond_resched_lock(&root->fs_info->ordered_extent_lock);
 			continue;
 		}
 
 		list_del_init(&ordered->root_extent_list);
 		atomic_inc(&ordered->refs);
-		inode = ordered->inode;
 
 		/*
-		 * the inode can't go away until all the pages are gone
-		 * and the pages won't go away while there is still
-		 * an ordered extent and the ordered extent won't go
-		 * away until it is off this list.  So, we can safely
-		 * increment i_count here and call iput later
+		 * the inode may be getting freed (in sys_unlink path).
 		 */
-		atomic_inc(&inode->i_count);
+		inode = igrab(ordered->inode);
+
 		spin_unlock(&root->fs_info->ordered_extent_lock);
 
-		btrfs_start_ordered_extent(inode, ordered, 1);
-		btrfs_put_ordered_extent(ordered);
-		iput(inode);
+		if (inode) {
+			btrfs_start_ordered_extent(inode, ordered, 1);
+			btrfs_put_ordered_extent(ordered);
+			iput(inode);
+		} else {
+			btrfs_put_ordered_extent(ordered);
+		}
 
 		spin_lock(&root->fs_info->ordered_extent_lock);
 	}
-	list_splice_init(&splice, &root->fs_info->ordered_extents);
 	spin_unlock(&root->fs_info->ordered_extent_lock);
 	return 0;
 }