btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges
[BUG]
For the following case, btrfs can underflow qgroup reserved space
at an error path:
(Page size 4K, function name without "btrfs_" prefix)
Task A | Task B
----------------------------------------------------------------------
Buffered_write [0, 2K) |
|- check_data_free_space() |
| |- qgroup_reserve_data() |
| Range aligned to page |
| range [0, 4K) <<< |
| 4K bytes reserved <<< |
|- copy pages to page cache |
| Buffered_write [2K, 4K)
| |- check_data_free_space()
| | |- qgroup_reserved_data()
| | Range alinged to page
| | range [0, 4K)
| | Already reserved by A <<<
| | 0 bytes reserved <<<
| |- delalloc_reserve_metadata()
| | And it *FAILED* (Maybe EQUOTA)
| |- free_reserved_data_space()
|- qgroup_free_data()
Range aligned to page range
[0, 4K)
Freeing 4K
(Special thanks to Chandan for the detailed report and analyse)
[CAUSE]
Above Task B is freeing reserved data range [0, 4K) which is actually
reserved by Task A.
And at writeback time, page dirty by Task A will go through writeback
routine, which will free 4K reserved data space at file extent insert
time, causing the qgroup underflow.
[FIX]
For btrfs_qgroup_free_data(), add @reserved parameter to only free
data ranges reserved by previous btrfs_qgroup_reserve_data().
So in above case, Task B will try to free 0 byte, so no underflow.
Reported-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Tested-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: David Sterba <dsterba@suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d9cf6df..5d3c6ac 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -345,7 +345,7 @@
* And at reserve time, it's always aligned to page size, so
* just free one page here.
*/
- btrfs_qgroup_free_data(inode, 0, PAGE_SIZE);
+ btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE);
btrfs_free_path(path);
btrfs_end_transaction(trans);
return ret;
@@ -2935,7 +2935,7 @@
* space for NOCOW range.
* As NOCOW won't cause a new delayed ref, just free the space
*/
- btrfs_qgroup_free_data(inode, ordered_extent->file_offset,
+ btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset,
ordered_extent->len);
btrfs_ordered_update_i_size(inode, 0, ordered_extent);
if (nolock)
@@ -4794,7 +4794,7 @@
again:
page = find_or_create_page(mapping, index, mask);
if (!page) {
- btrfs_delalloc_release_space(inode,
+ btrfs_delalloc_release_space(inode, data_reserved,
round_down(from, blocksize),
blocksize);
ret = -ENOMEM;
@@ -4866,7 +4866,7 @@
out_unlock:
if (ret)
- btrfs_delalloc_release_space(inode, block_start,
+ btrfs_delalloc_release_space(inode, data_reserved, block_start,
blocksize);
unlock_page(page);
put_page(page);
@@ -5266,7 +5266,7 @@
* Note, end is the bytenr of last byte, so we need + 1 here.
*/
if (state->state & EXTENT_DELALLOC)
- btrfs_qgroup_free_data(inode, start, end - start + 1);
+ btrfs_qgroup_free_data(inode, NULL, start, end - start + 1);
clear_extent_bit(io_tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY |
@@ -8792,8 +8792,8 @@
current->journal_info = NULL;
if (ret < 0 && ret != -EIOCBQUEUED) {
if (dio_data.reserve)
- btrfs_delalloc_release_space(inode, offset,
- dio_data.reserve);
+ btrfs_delalloc_release_space(inode, data_reserved,
+ offset, dio_data.reserve);
/*
* On error we might have left some ordered extents
* without submitting corresponding bios for them, so
@@ -8808,8 +8808,8 @@
dio_data.unsubmitted_oe_range_start,
false);
} else if (ret >= 0 && (size_t)ret < count)
- btrfs_delalloc_release_space(inode, offset,
- count - (size_t)ret);
+ btrfs_delalloc_release_space(inode, data_reserved,
+ offset, count - (size_t)ret);
}
out:
if (wakeup)
@@ -9008,7 +9008,7 @@
* free the entire extent.
*/
if (PageDirty(page))
- btrfs_qgroup_free_data(inode, page_start, PAGE_SIZE);
+ btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
if (!inode_evicting) {
clear_extent_bit(tree, page_start, page_end,
EXTENT_LOCKED | EXTENT_DIRTY |
@@ -9130,8 +9130,8 @@
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
- btrfs_delalloc_release_space(inode, page_start,
- PAGE_SIZE - reserved_space);
+ btrfs_delalloc_release_space(inode, data_reserved,
+ page_start, PAGE_SIZE - reserved_space);
}
}
@@ -9187,7 +9187,8 @@
}
unlock_page(page);
out:
- btrfs_delalloc_release_space(inode, page_start, reserved_space);
+ btrfs_delalloc_release_space(inode, data_reserved, page_start,
+ reserved_space);
out_noreserve:
sb_end_pagefault(inode->i_sb);
extent_changeset_free(data_reserved);
@@ -10557,7 +10558,7 @@
btrfs_end_transaction(trans);
}
if (cur_offset < end)
- btrfs_free_reserved_data_space(inode, cur_offset,
+ btrfs_free_reserved_data_space(inode, NULL, cur_offset,
end - cur_offset + 1);
return ret;
}