Btrfs: shift all end_io work to thread pools
bio_end_io for reads without checksumming on and btree writes were
happening without using async thread pools. This means the extent_io.c
code had to use spin_lock_irq and friends on the rb tree locks for
extent state.
There were some irq safe vs unsafe lock inversions between the delallock
lock and the extent state locks. This patch gets rid of them by moving
all end_io code into the thread pools.
To avoid contention and deadlocks between the data end_io processing and the
metadata end_io processing yet another thread pool is added to finish
off metadata writes.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0577e77..068bad4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1282,8 +1282,8 @@
}
/*
- * extent_io.c submission hook. This does the right thing for csum calculation on write,
- * or reading the csums from the tree before a read
+ * extent_io.c submission hook. This does the right thing for csum calculation
+ * on write, or reading the csums from the tree before a read
*/
static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
int mirror_num, unsigned long bio_flags)
@@ -1292,11 +1292,11 @@
int ret = 0;
int skip_sum;
+ skip_sum = btrfs_test_flag(inode, NODATASUM);
+
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
BUG_ON(ret);
- skip_sum = btrfs_test_flag(inode, NODATASUM);
-
if (!(rw & (1 << BIO_RW))) {
if (bio_flags & EXTENT_BIO_COMPRESSED) {
return btrfs_submit_compressed_read(inode, bio,
@@ -1648,13 +1648,13 @@
failrec->logical, failrec->len);
failrec->last_mirror++;
if (!state) {
- spin_lock_irq(&BTRFS_I(inode)->io_tree.lock);
+ spin_lock(&BTRFS_I(inode)->io_tree.lock);
state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
failrec->start,
EXTENT_LOCKED);
if (state && state->start != failrec->start)
state = NULL;
- spin_unlock_irq(&BTRFS_I(inode)->io_tree.lock);
+ spin_unlock(&BTRFS_I(inode)->io_tree.lock);
}
if (!state || failrec->last_mirror > num_copies) {
set_state_private(failure_tree, failrec->start, 0);