Btrfs: account for large extents with enospc
On our gluster boxes we stream large tar balls of backups onto our fses. With
160gb of ram this means we get really large contiguous ranges of dirty data, but
the way our ENOSPC stuff works is that as long as it's contiguous we only hold
metadata reservation for one extent. The problem is we limit our extents to
128mb, so we'll end up with at least 800 extents so our enospc accounting is
quite a bit lower than what we need. To keep track of this make sure we
increase outstanding_extents for every multiple of the max extent size so we can
be sure to have enough reserved metadata space. Thanks,
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 50de1fa..0f67370 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4963,19 +4963,25 @@
/**
* drop_outstanding_extent - drop an outstanding extent
* @inode: the inode we're dropping the extent for
+ * @num_bytes: the number of bytes we're relaseing.
*
* This is called when we are freeing up an outstanding extent, either called
* after an error or after an extent is written. This will return the number of
* reserved extents that need to be freed. This must be called with
* BTRFS_I(inode)->lock held.
*/
-static unsigned drop_outstanding_extent(struct inode *inode)
+static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
{
unsigned drop_inode_space = 0;
unsigned dropped_extents = 0;
+ unsigned num_extents = 0;
- BUG_ON(!BTRFS_I(inode)->outstanding_extents);
- BTRFS_I(inode)->outstanding_extents--;
+ num_extents = (unsigned)div64_u64(num_bytes +
+ BTRFS_MAX_EXTENT_SIZE - 1,
+ BTRFS_MAX_EXTENT_SIZE);
+ ASSERT(num_extents);
+ ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
+ BTRFS_I(inode)->outstanding_extents -= num_extents;
if (BTRFS_I(inode)->outstanding_extents == 0 &&
test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
@@ -5146,7 +5152,7 @@
out_fail:
spin_lock(&BTRFS_I(inode)->lock);
- dropped = drop_outstanding_extent(inode);
+ dropped = drop_outstanding_extent(inode, num_bytes);
/*
* If the inodes csum_bytes is the same as the original
* csum_bytes then we know we haven't raced with any free()ers
@@ -5225,7 +5231,7 @@
num_bytes = ALIGN(num_bytes, root->sectorsize);
spin_lock(&BTRFS_I(inode)->lock);
- dropped = drop_outstanding_extent(inode);
+ dropped = drop_outstanding_extent(inode, num_bytes);
if (num_bytes)
to_free = calc_csum_metadata_size(inode, num_bytes, 0);