Btrfs: Defrag only leaves, and only when the parent node has a single objectid

This allows us to defrag huge directories, but skip the expensive defrag
case in more common usage, where it does not help as much.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 984f474..74fec6b 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -16,6 +16,7 @@
  * Boston, MA 021110-1307, USA.
  */
 
+#include <linux/sched.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
@@ -190,7 +191,8 @@
 
 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root, struct extent_buffer *parent,
-		       int cache_only, u64 *last_ret)
+		       int start_slot, int cache_only, u64 *last_ret,
+		       struct btrfs_key *progress)
 {
 	struct extent_buffer *cur;
 	struct extent_buffer *tmp;
@@ -199,7 +201,6 @@
 	u64 last_block = 0;
 	u64 other;
 	u32 parent_nritems;
-	int start_slot;
 	int end_slot;
 	int i;
 	int err = 0;
@@ -221,15 +222,24 @@
 
 	parent_nritems = btrfs_header_nritems(parent);
 	blocksize = btrfs_level_size(root, parent_level - 1);
-
-	start_slot = 0;
 	end_slot = parent_nritems;
 
 	if (parent_nritems == 1)
 		return 0;
 
+	if (root != root->fs_info->extent_root) {
+		struct btrfs_key first_key;
+		struct btrfs_key last_key;
+
+		btrfs_node_key_to_cpu(parent, &first_key, 0);
+		btrfs_node_key_to_cpu(parent, &last_key, parent_nritems - 1);
+		if (first_key.objectid != last_key.objectid)
+			return 0;
+	}
+
 	for (i = start_slot; i < end_slot; i++) {
 		int close = 1;
+
 		blocknr = btrfs_node_blockptr(parent, i);
 		if (last_block == 0)
 			last_block = blocknr;
@@ -898,7 +908,7 @@
 	u32 blocksize;
 	u32 nscan = 0;
 
-	if (level == 0)
+	if (level != 1)
 		return;
 
 	if (!path->nodes[level])
@@ -2370,7 +2380,7 @@
 		}
 
 		/* delete the leaf if it is mostly empty */
-		if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
+		if (0 && used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
 			/* push_leaf_left fixes the path.
 			 * make sure the path still points to our leaf
 			 * for possible call to del_ptr below
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 1af0a96..37bccb1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -913,7 +913,8 @@
 		      ins_len, int cow);
 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root, struct extent_buffer *parent,
-		       int cache_only, u64 *last_ret);
+		       int start_slot, int cache_only, u64 *last_ret,
+		       struct btrfs_key *progress);
 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p);
 struct btrfs_path *btrfs_alloc_path(void);
 void btrfs_free_path(struct btrfs_path *p);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 67e4aca..bdfe05c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -353,7 +353,6 @@
 
 	if (root->defrag_running)
 		return 0;
-
 	trans = btrfs_start_transaction(root, 1);
 	while (1) {
 		root->defrag_running = 1;
@@ -361,7 +360,6 @@
 		nr = trans->blocks_used;
 		btrfs_end_transaction(trans, root);
 		mutex_unlock(&info->fs_mutex);
-
 		btrfs_btree_balance_dirty(info->tree_root, nr);
 		cond_resched();
 
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index f86eccf..b02355a 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -76,7 +76,9 @@
 		if (*level == 1) {
 			ret = btrfs_realloc_node(trans, root,
 						 path->nodes[*level],
-						 cache_only, last_ret);
+						 path->slots[*level],
+						 cache_only, last_ret,
+						 &root->defrag_progress);
 			if (is_extent)
 				btrfs_extent_post_op(trans, root);
 
@@ -100,10 +102,6 @@
 		ret = btrfs_cow_block(trans, root, next, path->nodes[*level],
 				      path->slots[*level], &next);
 		BUG_ON(ret);
-		ret = btrfs_realloc_node(trans, root, next, cache_only,
-					 last_ret);
-		BUG_ON(ret);
-
 		if (is_extent)
 			btrfs_extent_post_op(trans, root);
 
@@ -122,8 +120,8 @@
 	free_extent_buffer(path->nodes[*level]);
 	path->nodes[*level] = NULL;
 	*level += 1;
-	WARN_ON(ret);
-	return 0;
+	WARN_ON(ret && ret != -EAGAIN);
+	return ret;
 }
 
 static int defrag_walk_up(struct btrfs_trans_handle *trans,
@@ -147,27 +145,6 @@
 			root->defrag_level = i;
 			return 0;
 		} else {
-			if (*level > 1 && path->nodes[*level] != root->node &&
-			    btrfs_buffer_defrag(path->nodes[*level])) {
-				struct extent_buffer *next;
-				u64 last;
-				int ret;
-				ret = btrfs_cow_block(trans, root,
-						      path->nodes[*level],
-						      path->nodes[*level + 1],
-						      path->slots[*level + 1],
-						      &next);
-				BUG_ON(ret);
-				path->nodes[*level] = next;
-				last = next->start;
-				ret = btrfs_realloc_node(trans, root, next,
-							 cache_only, &last);
-				BUG_ON(ret);
-
-				if (root == root->fs_info->extent_root)
-					btrfs_extent_post_op(trans, root);
-			}
-
 			btrfs_clear_buffer_defrag(path->nodes[*level]);
 			free_extent_buffer(path->nodes[*level]);
 			path->nodes[*level] = NULL;
@@ -211,9 +188,6 @@
 		extent_buffer_get(root->node);
 		ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp);
 		BUG_ON(ret);
-		ret = btrfs_realloc_node(trans, root, root->node, cache_only,
-					 &last_ret);
-		BUG_ON(ret);
 		path->nodes[level] = root->node;
 		path->slots[level] = 0;
 		if (is_extent)