Btrfs: Add locking around volume management (device add/remove/balance)

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c6759fc..bbf9bf3 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1251,10 +1251,6 @@
 	WARN_ON(p->nodes[0] != NULL);
 	WARN_ON(cow && root == root->fs_info->extent_root &&
 		!mutex_is_locked(&root->fs_info->alloc_mutex));
-	WARN_ON(root == root->fs_info->chunk_root &&
-		!mutex_is_locked(&root->fs_info->chunk_mutex));
-	WARN_ON(root == root->fs_info->dev_root &&
-		!mutex_is_locked(&root->fs_info->chunk_mutex));
 	if (ins_len < 0)
 		lowest_unlock = 2;
 again:
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a287964..f3783db 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -523,6 +523,7 @@
 	struct mutex alloc_mutex;
 	struct mutex chunk_mutex;
 	struct mutex drop_mutex;
+	struct mutex volume_mutex;
 	struct list_head trans_list;
 	struct list_head hashers;
 	struct list_head dead_roots;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4cdc0b6..8f4c400 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1287,6 +1287,7 @@
 	mutex_init(&fs_info->chunk_mutex);
 	mutex_init(&fs_info->transaction_kthread_mutex);
 	mutex_init(&fs_info->cleaner_mutex);
+	mutex_init(&fs_info->volume_mutex);
 
 #if 0
 	ret = add_hasher(fs_info, "crc32c");
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5e0857f..8ebfa6b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -245,6 +245,7 @@
 	u64 search_start = *start_ret;
 	int wrapped = 0;
 
+	WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
 	total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
 	free_space_cache = &root->fs_info->free_space_cache;
 
@@ -1242,6 +1243,7 @@
 	u64 start;
 	u64 end;
 
+	WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
 	while(total) {
 		cache = btrfs_lookup_block_group(info, bytenr);
 		if (!cache) {
@@ -1297,6 +1299,7 @@
 	struct btrfs_block_group_cache *cache;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 
+	WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
 	if (pin) {
 		set_extent_dirty(&fs_info->pinned_extents,
 				bytenr, bytenr + num - 1, GFP_NOFS);
@@ -1391,6 +1394,7 @@
 	int level;
 	int err = 0;
 
+	WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
 	btrfs_set_stack_extent_refs(&extent_item, 1);
 	btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
 	path = btrfs_alloc_path();
@@ -1437,6 +1441,7 @@
 {
 	int err = 0;
 
+	WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
 	if (!pending) {
 		struct extent_buffer *buf;
 		buf = btrfs_find_tree_block(root, bytenr, num_bytes);
@@ -1490,6 +1495,7 @@
 	struct btrfs_extent_item *ei;
 	u32 refs;
 
+	WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
 	key.objectid = bytenr;
 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
 	key.offset = num_bytes;
@@ -1619,6 +1625,7 @@
 	struct extent_io_tree *pending_del;
 	struct extent_io_tree *pinned_extents;
 
+	WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
 	pending_del = &extent_root->fs_info->pending_del;
 	pinned_extents = &extent_root->fs_info->pinned_extents;
 
@@ -2428,6 +2435,10 @@
 		btrfs_node_key(node, &found_key, path->slots[level]);
 		WARN_ON(memcmp(&found_key, &root_item->drop_progress,
 			       sizeof(found_key)));
+		/*
+		 * unlock our path, this is safe because only this
+		 * function is allowed to delete this snapshot
+		 */
 		for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
 			if (path->nodes[i] && path->locks[i]) {
 				path->locks[i] = 0;
@@ -2611,7 +2622,6 @@
 	u64 root_search_start = BTRFS_FS_TREE_OBJECTID;
 	u64 found_bytenr;
 	int ret;
-	int i;
 
 	root_location.offset = (u64)-1;
 	root_location.type = BTRFS_ROOT_ITEM_KEY;
@@ -2635,12 +2645,6 @@
 				found_bytenr = path->nodes[level]->start;
 		}
 
-		for (i = level; i < BTRFS_MAX_LEVEL; i++) {
-			if (!path->nodes[i])
-				break;
-			free_extent_buffer(path->nodes[i]);
-			path->nodes[i] = NULL;
-		}
 		btrfs_release_path(cur_root, path);
 
 		if (found_bytenr == bytenr) {
@@ -2689,6 +2693,8 @@
 	int ret;
 	int level;
 
+	WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
+
 	ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
 			     struct btrfs_extent_ref);
 	ref_root = btrfs_ref_root(path->nodes[0], ref);
@@ -2707,6 +2713,7 @@
 	found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
 						&root_location);
 	BUG_ON(!found_root);
+	mutex_unlock(&extent_root->fs_info->alloc_mutex);
 
 	if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
 		found_key.objectid = ref_objectid;
@@ -2748,9 +2755,9 @@
 		/* this can happen if the reference is not against
 		 * the latest version of the tree root
 		 */
-		if (is_bad_inode(inode)) {
+		if (is_bad_inode(inode))
 			goto out;
-		}
+
 		*last_file_objectid = inode->i_ino;
 		*last_file_root = found_root->root_key.objectid;
 		*last_file_offset = ref_offset;
@@ -2760,7 +2767,7 @@
 	} else {
 		struct btrfs_trans_handle *trans;
 		struct extent_buffer *eb;
-		int i;
+		int needs_lock = 0;
 
 		eb = read_tree_block(found_root, extent_key->objectid,
 				     extent_key->offset, 0);
@@ -2782,26 +2789,40 @@
 		if (ret)
 			goto out;
 
+		/*
+		 * right here almost anything could happen to our key,
+		 * but that's ok.  The cow below will either relocate it
+		 * or someone else will have relocated it.  Either way,
+		 * it is in a different spot than it was before and
+		 * we're happy.
+		 */
+
 		trans = btrfs_start_transaction(found_root, 1);
 
+		if (found_root == extent_root->fs_info->extent_root ||
+		    found_root == extent_root->fs_info->chunk_root ||
+		    found_root == extent_root->fs_info->dev_root) {
+			needs_lock = 1;
+			mutex_lock(&extent_root->fs_info->alloc_mutex);
+		}
+
 		path->lowest_level = level;
 		path->reada = 2;
 		ret = btrfs_search_slot(trans, found_root, &found_key, path,
 					0, 1);
 		path->lowest_level = 0;
-		for (i = level; i < BTRFS_MAX_LEVEL; i++) {
-			if (!path->nodes[i])
-				break;
-			free_extent_buffer(path->nodes[i]);
-			path->nodes[i] = NULL;
-		}
 		btrfs_release_path(found_root, path);
+
 		if (found_root == found_root->fs_info->extent_root)
 			btrfs_extent_post_op(trans, found_root);
-		btrfs_end_transaction(trans, found_root);
-	}
+		if (needs_lock)
+			mutex_unlock(&extent_root->fs_info->alloc_mutex);
 
+		btrfs_end_transaction(trans, found_root);
+
+	}
 out:
+	mutex_lock(&extent_root->fs_info->alloc_mutex);
 	return 0;
 }
 
@@ -2943,7 +2964,10 @@
 
 	if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
 
+		mutex_unlock(&root->fs_info->alloc_mutex);
 		trans = btrfs_start_transaction(root, 1);
+		mutex_lock(&root->fs_info->alloc_mutex);
+
 		new_alloc_flags = update_block_group_flags(root,
 						   shrink_block_group->flags);
 		if (new_alloc_flags != shrink_block_group->flags) {
@@ -2954,7 +2978,10 @@
 		}
 		do_chunk_alloc(trans, root->fs_info->extent_root,
 			       calc + 2 * 1024 * 1024, new_alloc_flags, force);
+
+		mutex_unlock(&root->fs_info->alloc_mutex);
 		btrfs_end_transaction(trans, root);
+		mutex_lock(&root->fs_info->alloc_mutex);
 	}
 	return 0;
 }
@@ -3031,9 +3058,9 @@
 		if (ret < 0)
 			goto out;
 
+next:
 		leaf = path->nodes[0];
 		nritems = btrfs_header_nritems(leaf);
-next:
 		if (path->slots[0] >= nritems) {
 			ret = btrfs_next_leaf(root, path);
 			if (ret < 0)
@@ -3083,6 +3110,7 @@
 		printk("btrfs relocate found %llu last extent was %llu\n",
 		       (unsigned long long)total_found,
 		       (unsigned long long)found_key.objectid);
+		mutex_unlock(&root->fs_info->alloc_mutex);
 		trans = btrfs_start_transaction(tree_root, 1);
 		btrfs_commit_transaction(trans, tree_root);
 
@@ -3090,6 +3118,7 @@
 
 		trans = btrfs_start_transaction(tree_root, 1);
 		btrfs_commit_transaction(trans, tree_root);
+		mutex_lock(&root->fs_info->alloc_mutex);
 		goto again;
 	}
 
@@ -3097,7 +3126,10 @@
 	 * we've freed all the extents, now remove the block
 	 * group item from the tree
 	 */
+	mutex_unlock(&root->fs_info->alloc_mutex);
+
 	trans = btrfs_start_transaction(root, 1);
+	mutex_lock(&root->fs_info->alloc_mutex);
 	memcpy(&key, &shrink_block_group->key, sizeof(key));
 
 	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
@@ -3119,8 +3151,12 @@
 	kfree(shrink_block_group);
 
 	btrfs_del_item(trans, root, path);
+	btrfs_release_path(root, path);
+	mutex_unlock(&root->fs_info->alloc_mutex);
 	btrfs_commit_transaction(trans, root);
 
+	mutex_lock(&root->fs_info->alloc_mutex);
+
 	/* the code to unpin extents might set a few bits in the free
 	 * space cache for this range again
 	 */
@@ -3263,6 +3299,7 @@
 	struct btrfs_block_group_cache *cache;
 	struct extent_io_tree *block_group_cache;
 
+	WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
 	extent_root = root->fs_info->extent_root;
 	block_group_cache = &root->fs_info->block_group_cache;
 
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 026039a..83f17a5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -307,8 +307,7 @@
 		goto out;
 	}
 
-	mutex_lock(&root->fs_info->alloc_mutex);
-	mutex_lock(&root->fs_info->chunk_mutex);
+	mutex_lock(&root->fs_info->volume_mutex);
 	sizestr = vol_args->name;
 	devstr = strchr(sizestr, ':');
 	if (devstr) {
@@ -378,8 +377,7 @@
 	}
 
 out_unlock:
-	mutex_lock(&root->fs_info->alloc_mutex);
-	mutex_lock(&root->fs_info->chunk_mutex);
+	mutex_unlock(&root->fs_info->volume_mutex);
 out:
 	kfree(vol_args);
 	return ret;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 4e7cee2..5e6ee7a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -56,6 +56,18 @@
 	mutex_unlock(&uuid_mutex);
 }
 
+static void lock_chunks(struct btrfs_root *root)
+{
+	mutex_lock(&root->fs_info->alloc_mutex);
+	mutex_lock(&root->fs_info->chunk_mutex);
+}
+
+static void unlock_chunks(struct btrfs_root *root)
+{
+	mutex_unlock(&root->fs_info->alloc_mutex);
+	mutex_unlock(&root->fs_info->chunk_mutex);
+}
+
 int btrfs_cleanup_fs_uuids(void)
 {
 	struct btrfs_fs_devices *fs_devices;
@@ -822,6 +834,7 @@
 	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
 	key.type = BTRFS_DEV_ITEM_KEY;
 	key.offset = device->devid;
+	lock_chunks(root);
 
 	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
 	if (ret < 0)
@@ -856,6 +869,7 @@
 				    total_bytes - 1);
 out:
 	btrfs_free_path(path);
+	unlock_chunks(root);
 	btrfs_commit_transaction(trans, root);
 	return ret;
 }
@@ -870,9 +884,8 @@
 	u64 devid;
 	int ret = 0;
 
-	mutex_lock(&root->fs_info->alloc_mutex);
-	mutex_lock(&root->fs_info->chunk_mutex);
 	mutex_lock(&uuid_mutex);
+	mutex_lock(&root->fs_info->volume_mutex);
 
 	all_avail = root->fs_info->avail_data_alloc_bits |
 		root->fs_info->avail_system_alloc_bits |
@@ -988,9 +1001,8 @@
 	if (bdev)
 		close_bdev_excl(bdev);
 out:
+	mutex_unlock(&root->fs_info->volume_mutex);
 	mutex_unlock(&uuid_mutex);
-	mutex_unlock(&root->fs_info->chunk_mutex);
-	mutex_unlock(&root->fs_info->alloc_mutex);
 	return ret;
 }
 
@@ -1010,10 +1022,10 @@
 		return -EIO;
 	}
 
-	mutex_lock(&root->fs_info->alloc_mutex);
-	mutex_lock(&root->fs_info->chunk_mutex);
+	mutex_lock(&root->fs_info->volume_mutex);
 
 	trans = btrfs_start_transaction(root, 1);
+	lock_chunks(root);
 	devices = &root->fs_info->fs_devices->devices;
 	list_for_each(cur, devices) {
 		device = list_entry(cur, struct btrfs_device, dev_list);
@@ -1065,9 +1077,9 @@
 	root->fs_info->fs_devices->num_devices++;
 	root->fs_info->fs_devices->open_devices++;
 out:
+	unlock_chunks(root);
 	btrfs_end_transaction(trans, root);
-	mutex_unlock(&root->fs_info->chunk_mutex);
-	mutex_unlock(&root->fs_info->alloc_mutex);
+	mutex_unlock(&root->fs_info->volume_mutex);
 
 	return ret;
 
@@ -1122,7 +1134,7 @@
 	return ret;
 }
 
-int btrfs_grow_device(struct btrfs_trans_handle *trans,
+static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
 		      struct btrfs_device *device, u64 new_size)
 {
 	struct btrfs_super_block *super_copy =
@@ -1134,6 +1146,16 @@
 	return btrfs_update_device(trans, device);
 }
 
+int btrfs_grow_device(struct btrfs_trans_handle *trans,
+		      struct btrfs_device *device, u64 new_size)
+{
+	int ret;
+	lock_chunks(device->dev_root);
+	ret = __btrfs_grow_device(trans, device, new_size);
+	unlock_chunks(device->dev_root);
+	return ret;
+}
+
 static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
 			    struct btrfs_root *root,
 			    u64 chunk_tree, u64 chunk_objectid,
@@ -1234,6 +1256,8 @@
 	trans = btrfs_start_transaction(root, 1);
 	BUG_ON(!trans);
 
+	lock_chunks(root);
+
 	/*
 	 * step two, delete the device extents and the
 	 * chunk tree entries
@@ -1278,6 +1302,7 @@
 	/* once for us */
 	free_extent_map(em);
 
+	unlock_chunks(root);
 	btrfs_end_transaction(trans, root);
 	return 0;
 }
@@ -1308,8 +1333,7 @@
 	struct btrfs_key found_key;
 
 
-	BUG(); /* FIXME, needs locking */
-
+	mutex_lock(&dev_root->fs_info->volume_mutex);
 	dev_root = dev_root->fs_info->dev_root;
 
 	/* step one make some room on all the devices */
@@ -1355,13 +1379,14 @@
 
 		ret = btrfs_previous_item(chunk_root, path, 0,
 					  BTRFS_CHUNK_ITEM_KEY);
-		if (ret) {
+		if (ret)
 			break;
-		}
+
 		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
 				      path->slots[0]);
 		if (found_key.objectid != key.objectid)
 			break;
+
 		chunk = btrfs_item_ptr(path->nodes[0],
 				       path->slots[0],
 				       struct btrfs_chunk);
@@ -1370,16 +1395,17 @@
 		if (key.offset == 0)
 			break;
 
+		btrfs_release_path(chunk_root, path);
 		ret = btrfs_relocate_chunk(chunk_root,
 					   chunk_root->root_key.objectid,
 					   found_key.objectid,
 					   found_key.offset);
 		BUG_ON(ret);
-		btrfs_release_path(chunk_root, path);
 	}
 	ret = 0;
 error:
 	btrfs_free_path(path);
+	mutex_unlock(&dev_root->fs_info->volume_mutex);
 	return ret;
 }
 
@@ -1419,14 +1445,18 @@
 
 	path->reada = 2;
 
+	lock_chunks(root);
+
 	device->total_bytes = new_size;
 	ret = btrfs_update_device(trans, device);
 	if (ret) {
+		unlock_chunks(root);
 		btrfs_end_transaction(trans, root);
 		goto done;
 	}
 	WARN_ON(diff > old_total);
 	btrfs_set_super_total_bytes(super_copy, old_total - diff);
+	unlock_chunks(root);
 	btrfs_end_transaction(trans, root);
 
 	key.objectid = device->devid;