Btrfs: alloc_mutex latency reduction

This releases the alloc_mutex in a few places that hold it for over long
operations.  btrfs_lookup_block_group is changed so that it doesn't need
the mutex at all.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index eeb5afa..90504ba 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -483,6 +483,7 @@
 	struct btrfs_key key;
 	struct btrfs_block_group_item item;
 	struct btrfs_space_info *space_info;
+	spinlock_t lock;
 	u64 pinned;
 	u64 flags;
 	int cached;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 72fa282..febc629 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -319,7 +319,7 @@
 		cache = btrfs_lookup_first_block_group(root->fs_info, last);
 	}
 	cache_miss = 0;
-	cache = __btrfs_find_block_group(root, cache, last, data, 0);
+	cache = btrfs_find_block_group(root, cache, last, data, 0);
 	if (!cache)
 		goto no_cache;
 	*cache_ret = cache;
@@ -379,19 +379,25 @@
 		struct btrfs_block_group_cache *shint;
 		shint = btrfs_lookup_first_block_group(info, search_start);
 		if (shint && block_group_bits(shint, data) && !shint->ro) {
+			spin_lock(&shint->lock);
 			used = btrfs_block_group_used(&shint->item);
 			if (used + shint->pinned <
 			    div_factor(shint->key.offset, factor)) {
+				spin_unlock(&shint->lock);
 				return shint;
 			}
+			spin_unlock(&shint->lock);
 		}
 	}
 	if (hint && !hint->ro && block_group_bits(hint, data)) {
+		spin_lock(&hint->lock);
 		used = btrfs_block_group_used(&hint->item);
 		if (used + hint->pinned <
 		    div_factor(hint->key.offset, factor)) {
+			spin_unlock(&hint->lock);
 			return hint;
 		}
+		spin_unlock(&hint->lock);
 		last = hint->key.objectid + hint->key.offset;
 	} else {
 		if (hint)
@@ -413,6 +419,7 @@
 		}
 
 		cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
+		spin_lock(&cache->lock);
 		last = cache->key.objectid + cache->key.offset;
 		used = btrfs_block_group_used(&cache->item);
 
@@ -420,9 +427,11 @@
 			free_check = div_factor(cache->key.offset, factor);
 			if (used + cache->pinned < free_check) {
 				found_group = cache;
+				spin_unlock(&cache->lock);
 				goto found;
 			}
 		}
+		spin_unlock(&cache->lock);
 		cond_resched();
 	}
 	if (!wrapped) {
@@ -447,9 +456,7 @@
 {
 
 	struct btrfs_block_group_cache *ret;
-	mutex_lock(&root->fs_info->alloc_mutex);
 	ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
-	mutex_unlock(&root->fs_info->alloc_mutex);
 	return ret;
 }
 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
@@ -1262,21 +1269,25 @@
 		set_extent_bits(&info->block_group_cache, start, end,
 				BLOCK_GROUP_DIRTY, GFP_NOFS);
 
+		spin_lock(&cache->lock);
 		old_val = btrfs_block_group_used(&cache->item);
 		num_bytes = min(total, cache->key.offset - byte_in_group);
 		if (alloc) {
 			old_val += num_bytes;
 			cache->space_info->bytes_used += num_bytes;
+			btrfs_set_block_group_used(&cache->item, old_val);
+			spin_unlock(&cache->lock);
 		} else {
 			old_val -= num_bytes;
 			cache->space_info->bytes_used -= num_bytes;
+			btrfs_set_block_group_used(&cache->item, old_val);
+			spin_unlock(&cache->lock);
 			if (mark_free) {
 				set_extent_dirty(&info->free_space_cache,
 						 bytenr, bytenr + num_bytes - 1,
 						 GFP_NOFS);
 			}
 		}
-		btrfs_set_block_group_used(&cache->item, old_val);
 		total -= num_bytes;
 		bytenr += num_bytes;
 	}
@@ -1325,14 +1336,18 @@
 		}
 		if (pin) {
 			if (cache) {
+				spin_lock(&cache->lock);
 				cache->pinned += len;
 				cache->space_info->bytes_pinned += len;
+				spin_unlock(&cache->lock);
 			}
 			fs_info->total_pinned += len;
 		} else {
 			if (cache) {
+				spin_lock(&cache->lock);
 				cache->pinned -= len;
 				cache->space_info->bytes_pinned -= len;
+				spin_unlock(&cache->lock);
 			}
 			fs_info->total_pinned -= len;
 		}
@@ -1380,6 +1395,11 @@
 		update_pinned_extents(root, start, end + 1 - start, 0);
 		clear_extent_dirty(unpin, start, end, GFP_NOFS);
 		set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
+		if (need_resched()) {
+			mutex_unlock(&root->fs_info->alloc_mutex);
+			cond_resched();
+			mutex_lock(&root->fs_info->alloc_mutex);
+		}
 	}
 	mutex_unlock(&root->fs_info->alloc_mutex);
 	return 0;
@@ -1417,8 +1437,16 @@
 					&extent_item, sizeof(extent_item));
 		clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
 				  GFP_NOFS);
-		eb = read_tree_block(extent_root, ins.objectid, ins.offset,
-				     trans->transid);
+
+		eb = btrfs_find_tree_block(extent_root, ins.objectid,
+					   ins.offset);
+
+		if (!btrfs_buffer_uptodate(eb, trans->transid)) {
+			mutex_unlock(&extent_root->fs_info->alloc_mutex);
+			btrfs_read_buffer(eb, trans->transid);
+			mutex_lock(&extent_root->fs_info->alloc_mutex);
+		}
+
 		btrfs_tree_lock(eb);
 		level = btrfs_header_level(eb);
 		if (level == 0) {
@@ -1437,6 +1465,11 @@
 					  0, level,
 					  btrfs_disk_key_objectid(&first));
 		BUG_ON(err);
+		if (need_resched()) {
+			mutex_unlock(&extent_root->fs_info->alloc_mutex);
+			cond_resched();
+			mutex_lock(&extent_root->fs_info->alloc_mutex);
+		}
 	}
 	btrfs_free_path(path);
 	return 0;
@@ -1640,15 +1673,28 @@
 					    EXTENT_LOCKED);
 		if (ret)
 			break;
-		update_pinned_extents(extent_root, start, end + 1 - start, 1);
 		clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
 				  GFP_NOFS);
-		ret = __free_extent(trans, extent_root,
-				     start, end + 1 - start,
-				     extent_root->root_key.objectid,
-				     0, 0, 0, 0, 0);
+		if (!test_range_bit(&extent_root->fs_info->extent_ins,
+				    start, end, EXTENT_LOCKED, 0)) {
+			update_pinned_extents(extent_root, start,
+					      end + 1 - start, 1);
+			ret = __free_extent(trans, extent_root,
+					     start, end + 1 - start,
+					     extent_root->root_key.objectid,
+					     0, 0, 0, 0, 0);
+		} else {
+			clear_extent_bits(&extent_root->fs_info->extent_ins,
+					  start, end, EXTENT_LOCKED, GFP_NOFS);
+		}
 		if (ret)
 			err = ret;
+
+		if (need_resched()) {
+			mutex_unlock(&extent_root->fs_info->alloc_mutex);
+			cond_resched();
+			mutex_lock(&extent_root->fs_info->alloc_mutex);
+		}
 	}
 	return err;
 }
@@ -1768,12 +1814,12 @@
 		block_group = btrfs_lookup_first_block_group(info, hint_byte);
 		if (!block_group)
 			hint_byte = search_start;
-		block_group = __btrfs_find_block_group(root, block_group,
+		block_group = btrfs_find_block_group(root, block_group,
 						     hint_byte, data, 1);
 		if (last_ptr && *last_ptr == 0 && block_group)
 			hint_byte = block_group->key.objectid;
 	} else {
-		block_group = __btrfs_find_block_group(root,
+		block_group = btrfs_find_block_group(root,
 						     trans->block_group,
 						     search_start, data, 1);
 	}
@@ -1895,7 +1941,7 @@
 	}
 	block_group = btrfs_lookup_first_block_group(info, search_start);
 	cond_resched();
-	block_group = __btrfs_find_block_group(root, block_group,
+	block_group = btrfs_find_block_group(root, block_group,
 					     search_start, data, 0);
 	goto check_failed;
 
@@ -3032,11 +3078,14 @@
 	u64 new_alloc_flags;
 	u64 calc;
 
+	spin_lock(&shrink_block_group->lock);
 	if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
-
+		spin_unlock(&shrink_block_group->lock);
 		mutex_unlock(&root->fs_info->alloc_mutex);
+
 		trans = btrfs_start_transaction(root, 1);
 		mutex_lock(&root->fs_info->alloc_mutex);
+		spin_lock(&shrink_block_group->lock);
 
 		new_alloc_flags = update_block_group_flags(root,
 						   shrink_block_group->flags);
@@ -3046,13 +3095,16 @@
 		} else {
 			calc = shrink_block_group->key.offset;
 		}
+		spin_unlock(&shrink_block_group->lock);
+
 		do_chunk_alloc(trans, root->fs_info->extent_root,
 			       calc + 2 * 1024 * 1024, new_alloc_flags, force);
 
 		mutex_unlock(&root->fs_info->alloc_mutex);
 		btrfs_end_transaction(trans, root);
 		mutex_lock(&root->fs_info->alloc_mutex);
-	}
+	} else
+		spin_unlock(&shrink_block_group->lock);
 	return 0;
 }
 
@@ -3199,6 +3251,7 @@
 	mutex_unlock(&root->fs_info->alloc_mutex);
 
 	trans = btrfs_start_transaction(root, 1);
+
 	mutex_lock(&root->fs_info->alloc_mutex);
 	memcpy(&key, &shrink_block_group->key, sizeof(key));
 
@@ -3316,6 +3369,7 @@
 			break;
 		}
 
+		spin_lock_init(&cache->lock);
 		read_extent_buffer(leaf, &cache->item,
 				   btrfs_item_ptr_offset(leaf, path->slots[0]),
 				   sizeof(cache->item));
@@ -3343,10 +3397,12 @@
 		/* use EXTENT_LOCKED to prevent merging */
 		set_extent_bits(block_group_cache, found_key.objectid,
 				found_key.objectid + found_key.offset - 1,
-				bit | EXTENT_LOCKED, GFP_NOFS);
+				EXTENT_LOCKED, GFP_NOFS);
 		set_state_private(block_group_cache, found_key.objectid,
 				  (unsigned long)cache);
-
+		set_extent_bits(block_group_cache, found_key.objectid,
+				found_key.objectid + found_key.offset - 1,
+				bit | EXTENT_LOCKED, GFP_NOFS);
 		if (key.objectid >=
 		    btrfs_super_total_bytes(&info->super_copy))
 			break;
@@ -3377,6 +3433,7 @@
 	BUG_ON(!cache);
 	cache->key.objectid = chunk_offset;
 	cache->key.offset = size;
+	spin_lock_init(&cache->lock);
 	btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
 
 	btrfs_set_block_group_used(&cache->item, bytes_used);
@@ -3391,10 +3448,13 @@
 	bit = block_group_state_bits(type);
 	set_extent_bits(block_group_cache, chunk_offset,
 			chunk_offset + size - 1,
-			bit | EXTENT_LOCKED, GFP_NOFS);
-
+			EXTENT_LOCKED, GFP_NOFS);
 	set_state_private(block_group_cache, chunk_offset,
 			  (unsigned long)cache);
+	set_extent_bits(block_group_cache, chunk_offset,
+			chunk_offset + size - 1,
+			bit | EXTENT_LOCKED, GFP_NOFS);
+
 	ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
 				sizeof(cache->item));
 	BUG_ON(ret);