Btrfs: Add btree locking to the tree defragmentation code

The online btree defragger is simplified and rewritten to use
standard btree searches instead of a walk up / down mechanism.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index fab851d..1677e4e 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -21,167 +21,26 @@
 #include "disk-io.h"
 #include "print-tree.h"
 #include "transaction.h"
-
-static void reada_defrag(struct btrfs_root *root,
-			 struct extent_buffer *node)
-{
-	int i;
-	u32 nritems;
-	u64 bytenr;
-	u64 gen;
-	u32 blocksize;
-	int ret;
-
-	blocksize = btrfs_level_size(root, btrfs_header_level(node) - 1);
-	nritems = btrfs_header_nritems(node);
-	for (i = 0; i < nritems; i++) {
-		bytenr = btrfs_node_blockptr(node, i);
-		gen = btrfs_node_ptr_generation(node, i);
-		ret = readahead_tree_block(root, bytenr, blocksize, gen);
-		if (ret)
-			break;
-	}
-}
-
-static int defrag_walk_down(struct btrfs_trans_handle *trans,
-			    struct btrfs_root *root,
-			    struct btrfs_path *path, int *level,
-			    int cache_only, u64 *last_ret)
-{
-	struct extent_buffer *next;
-	struct extent_buffer *cur;
-	u64 bytenr;
-	u64 ptr_gen;
-	int ret = 0;
-	int is_extent = 0;
-
-	WARN_ON(*level < 0);
-	WARN_ON(*level >= BTRFS_MAX_LEVEL);
-
-	if (root->fs_info->extent_root == root)
-		is_extent = 1;
-
-	if (*level == 1 && cache_only && path->nodes[1] &&
-	    !btrfs_buffer_defrag(path->nodes[1])) {
-		goto out;
-	}
-	while(*level > 0) {
-		WARN_ON(*level < 0);
-		WARN_ON(*level >= BTRFS_MAX_LEVEL);
-		cur = path->nodes[*level];
-
-		if (!cache_only && *level > 1 && path->slots[*level] == 0)
-			reada_defrag(root, cur);
-
-		if (btrfs_header_level(cur) != *level)
-			WARN_ON(1);
-
-		if (path->slots[*level] >=
-		    btrfs_header_nritems(cur))
-			break;
-
-		if (*level == 1) {
-			WARN_ON(btrfs_header_generation(path->nodes[*level]) !=
-							trans->transid);
-			ret = btrfs_realloc_node(trans, root,
-						 path->nodes[*level],
-						 path->slots[*level],
-						 cache_only, last_ret,
-						 &root->defrag_progress);
-			if (is_extent)
-				btrfs_extent_post_op(trans, root);
-
-			break;
-		}
-		bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
-		ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
-
-		if (cache_only) {
-			next = btrfs_find_tree_block(root, bytenr,
-					   btrfs_level_size(root, *level - 1));
-			if (!next || !btrfs_buffer_uptodate(next, ptr_gen) ||
-			    !btrfs_buffer_defrag(next)) {
-				free_extent_buffer(next);
-				path->slots[*level]++;
-				continue;
-			}
-		} else {
-			next = read_tree_block(root, bytenr,
-				       btrfs_level_size(root, *level - 1),
-				       ptr_gen);
-		}
-		ret = btrfs_cow_block(trans, root, next, path->nodes[*level],
-				      path->slots[*level], &next);
-		BUG_ON(ret);
-		if (is_extent)
-			btrfs_extent_post_op(trans, root);
-
-		WARN_ON(*level <= 0);
-		if (path->nodes[*level-1])
-			free_extent_buffer(path->nodes[*level-1]);
-		path->nodes[*level-1] = next;
-		*level = btrfs_header_level(next);
-		path->slots[*level] = 0;
-	}
-	WARN_ON(*level < 0);
-	WARN_ON(*level >= BTRFS_MAX_LEVEL);
-
-	btrfs_clear_buffer_defrag(path->nodes[*level]);
-out:
-	free_extent_buffer(path->nodes[*level]);
-	path->nodes[*level] = NULL;
-	*level += 1;
-	WARN_ON(ret && ret != -EAGAIN);
-	return ret;
-}
-
-static int defrag_walk_up(struct btrfs_trans_handle *trans,
-			  struct btrfs_root *root,
-			  struct btrfs_path *path, int *level,
-			  int cache_only)
-{
-	int i;
-	int slot;
-	struct extent_buffer *node;
-
-	for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
-		slot = path->slots[i];
-		if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
-			path->slots[i]++;
-			*level = i;
-			node = path->nodes[i];
-			WARN_ON(i == 0);
-			btrfs_node_key_to_cpu(node, &root->defrag_progress,
-					      path->slots[i]);
-			root->defrag_level = i;
-			return 0;
-		} else {
-			btrfs_clear_buffer_defrag(path->nodes[*level]);
-			free_extent_buffer(path->nodes[*level]);
-			path->nodes[*level] = NULL;
-			*level = i + 1;
-		}
-	}
-	return 1;
-}
+#include "locking.h"
 
 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root, int cache_only)
 {
 	struct btrfs_path *path = NULL;
-	struct extent_buffer *tmp;
+	struct btrfs_key key;
 	int ret = 0;
 	int wret;
 	int level;
 	int orig_level;
 	int i;
 	int is_extent = 0;
+	int next_key_ret = 0;
 	u64 last_ret = 0;
 
-	if (root->fs_info->extent_root == root)
+	if (root->fs_info->extent_root == root) {
+		mutex_lock(&root->fs_info->alloc_mutex);
 		is_extent = 1;
-
-	goto out;
+	}
 
 	if (root->ref_cows == 0 && !is_extent)
 		goto out;
@@ -200,67 +59,63 @@
 		goto out;
 	}
 	if (root->defrag_progress.objectid == 0) {
+		struct extent_buffer *root_node;
 		u32 nritems;
 
-		nritems = btrfs_header_nritems(root->node);
+		root_node = btrfs_lock_root_node(root);
+		nritems = btrfs_header_nritems(root_node);
 		root->defrag_max.objectid = 0;
 		/* from above we know this is not a leaf */
-		btrfs_node_key_to_cpu(root->node, &root->defrag_max,
+		btrfs_node_key_to_cpu(root_node, &root->defrag_max,
 				      nritems - 1);
-		extent_buffer_get(root->node);
-		ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp);
-		BUG_ON(ret);
-		path->nodes[level] = root->node;
-		path->slots[level] = 0;
-		if (is_extent)
-			btrfs_extent_post_op(trans, root);
+		btrfs_tree_unlock(root_node);
+		free_extent_buffer(root_node);
+		memset(&key, 0, sizeof(key));
 	} else {
-		level = root->defrag_level;
-		path->lowest_level = level;
-		wret = btrfs_search_slot(trans, root, &root->defrag_progress,
-					 path, 0, 1);
-
-		if (is_extent)
-			btrfs_extent_post_op(trans, root);
-
-		if (wret < 0) {
-			ret = wret;
-			goto out;
-		}
-
-		while(level > 0 && !path->nodes[level])
-			level--;
-
-		if (!path->nodes[level]) {
-			ret = 0;
-			goto out;
-		}
+		memcpy(&key, &root->defrag_progress, sizeof(key));
 	}
 
-	while(1) {
-		wret = defrag_walk_down(trans, root, path, &level, cache_only,
-					&last_ret);
-		if (wret > 0)
-			break;
-		if (wret < 0)
-			ret = wret;
+	path->lowest_level = 1;
+	path->keep_locks = 1;
+	wret = btrfs_search_slot(trans, root, &key, path, 0, 1);
 
-		wret = defrag_walk_up(trans, root, path, &level, cache_only);
-		if (wret > 0)
-			break;
-		if (wret < 0)
-			ret = wret;
-		else
-			ret = -EAGAIN;
-		break;
+	if (wret < 0) {
+		ret = wret;
+		goto out;
 	}
-	for (i = 0; i <= orig_level; i++) {
+	if (!path->nodes[1]) {
+		ret = 0;
+		goto out;
+	}
+	path->slots[1] = btrfs_header_nritems(path->nodes[1]);
+	next_key_ret = btrfs_find_next_key(root, path, &key, 1);
+	ret = btrfs_realloc_node(trans, root,
+				 path->nodes[1], 0,
+				 cache_only, &last_ret,
+				 &root->defrag_progress);
+	WARN_ON(ret && ret != -EAGAIN);
+	if (next_key_ret == 0) {
+		memcpy(&root->defrag_progress, &key, sizeof(key));
+		ret = -EAGAIN;
+	}
+
+	for (i = 1; i < BTRFS_MAX_LEVEL; i++) {
+		if (path->locks[i]) {
+			btrfs_tree_unlock(path->nodes[i]);
+			path->locks[i] = 0;
+		}
 		if (path->nodes[i]) {
 			free_extent_buffer(path->nodes[i]);
 			path->nodes[i] = NULL;
 		}
 	}
+	if (is_extent)
+		btrfs_extent_post_op(trans, root);
+
 out:
+	if (is_extent)
+		mutex_unlock(&root->fs_info->alloc_mutex);
+
 	if (path)
 		btrfs_free_path(path);
 	if (ret == -EAGAIN) {