Merge branch 'for-chris' of git://git.kernel.org/pub/scm/linux/kernel/git/josef/btrfs-next
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 18af6f4..1431a69 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1700,7 +1700,7 @@
unsigned int j;
DECLARE_COMPLETION_ONSTACK(complete);
- bio = bio_alloc(GFP_NOFS, num_pages - i);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);
if (!bio) {
printk(KERN_INFO
"btrfsic: bio_alloc() for %u pages failed!\n",
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4bdb052..e7b3cb5 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3130,7 +3130,7 @@
* caller
*/
device->flush_bio = NULL;
- bio = bio_alloc(GFP_NOFS, 0);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
if (!bio)
return -ENOMEM;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ca4355d..b306b3a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -23,6 +23,7 @@
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
+static struct bio_set *btrfs_bioset;
#ifdef CONFIG_BTRFS_DEBUG
static LIST_HEAD(buffers);
@@ -125,10 +126,20 @@
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!extent_buffer_cache)
goto free_state_cache;
+
+ btrfs_bioset = bioset_create(BIO_POOL_SIZE,
+ offsetof(struct btrfs_io_bio, bio));
+ if (!btrfs_bioset)
+ goto free_buffer_cache;
return 0;
+free_buffer_cache:
+ kmem_cache_destroy(extent_buffer_cache);
+ extent_buffer_cache = NULL;
+
free_state_cache:
kmem_cache_destroy(extent_state_cache);
+ extent_state_cache = NULL;
return -ENOMEM;
}
@@ -145,6 +156,8 @@
kmem_cache_destroy(extent_state_cache);
if (extent_buffer_cache)
kmem_cache_destroy(extent_buffer_cache);
+ if (btrfs_bioset)
+ bioset_free(btrfs_bioset);
}
void extent_io_tree_init(struct extent_io_tree *tree,
@@ -2024,7 +2037,7 @@
if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
return 0;
- bio = bio_alloc(GFP_NOFS, 1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
bio->bi_private = &compl;
@@ -2314,7 +2327,7 @@
return -EIO;
}
- bio = bio_alloc(GFP_NOFS, 1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio) {
free_io_failure(inode, failrec, 0);
return -EIO;
@@ -2436,10 +2449,11 @@
struct page *page = bvec->bv_page;
struct extent_state *cached = NULL;
struct extent_state *state;
+ struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
- "mirror=%ld\n", (u64)bio->bi_sector, err,
- (long int)bio->bi_bdev);
+ "mirror=%lu\n", (u64)bio->bi_sector, err,
+ io_bio->mirror_num);
tree = &BTRFS_I(page->mapping->host)->io_tree;
/* We always issue full-page reads, but if some block
@@ -2470,7 +2484,7 @@
}
spin_unlock(&tree->lock);
- mirror = (int)(unsigned long)bio->bi_bdev;
+ mirror = io_bio->mirror_num;
if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
ret = tree->ops->readpage_end_io_hook(page, start, end,
state, mirror);
@@ -2525,17 +2539,23 @@
bio_put(bio);
}
+/*
+ * this allocates from the btrfs_bioset. We're returning a bio right now
+ * but you can call btrfs_io_bio for the appropriate container_of magic
+ */
struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
gfp_t gfp_flags)
{
struct bio *bio;
- bio = bio_alloc(gfp_flags, nr_vecs);
+ bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
if (bio == NULL && (current->flags & PF_MEMALLOC)) {
- while (!bio && (nr_vecs /= 2))
- bio = bio_alloc(gfp_flags, nr_vecs);
+ while (!bio && (nr_vecs /= 2)) {
+ bio = bio_alloc_bioset(gfp_flags,
+ nr_vecs, btrfs_bioset);
+ }
}
if (bio) {
@@ -2546,6 +2566,19 @@
return bio;
}
+struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
+{
+ return bio_clone_bioset(bio, gfp_mask, btrfs_bioset);
+}
+
+
+/* this also allocates from the btrfs_bioset */
+struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
+{
+ return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
+}
+
+
static int __must_check submit_one_bio(int rw, struct bio *bio,
int mirror_num, unsigned long bio_flags)
{
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index a2c03a1..41fb81e 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -336,6 +336,8 @@
struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
gfp_t gfp_flags);
+struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs);
+struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask);
struct btrfs_fs_info;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 19eef3e..23c596c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6931,7 +6931,11 @@
/* IO errors */
int errors;
+ /* orig_bio is our btrfs_io_bio */
struct bio *orig_bio;
+
+ /* dio_bio came from fs/direct-io.c */
+ struct bio *dio_bio;
};
static void btrfs_endio_direct_read(struct bio *bio, int err)
@@ -6941,6 +6945,7 @@
struct bio_vec *bvec = bio->bi_io_vec;
struct inode *inode = dip->inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct bio *dio_bio;
u64 start;
start = dip->logical_offset;
@@ -6980,14 +6985,15 @@
unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
dip->logical_offset + dip->bytes - 1);
- bio->bi_private = dip->private;
+ dio_bio = dip->dio_bio;
kfree(dip);
/* If we had a csum failure make sure to clear the uptodate flag */
if (err)
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
- dio_end_io(bio, err);
+ clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
+ dio_end_io(dio_bio, err);
+ bio_put(bio);
}
static void btrfs_endio_direct_write(struct bio *bio, int err)
@@ -6998,6 +7004,7 @@
struct btrfs_ordered_extent *ordered = NULL;
u64 ordered_offset = dip->logical_offset;
u64 ordered_bytes = dip->bytes;
+ struct bio *dio_bio;
int ret;
if (err)
@@ -7025,14 +7032,15 @@
goto again;
}
out_done:
- bio->bi_private = dip->private;
+ dio_bio = dip->dio_bio;
kfree(dip);
/* If we had an error make sure to clear the uptodate flag */
if (err)
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
- dio_end_io(bio, err);
+ clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
+ dio_end_io(dio_bio, err);
+ bio_put(bio);
}
static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
@@ -7068,10 +7076,10 @@
if (!atomic_dec_and_test(&dip->pending_bios))
goto out;
- if (dip->errors)
+ if (dip->errors) {
bio_io_error(dip->orig_bio);
- else {
- set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags);
+ } else {
+ set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags);
bio_endio(dip->orig_bio, 0);
}
out:
@@ -7246,25 +7254,34 @@
return 0;
}
-static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
- loff_t file_offset)
+static void btrfs_submit_direct(int rw, struct bio *dio_bio,
+ struct inode *inode, loff_t file_offset)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_dio_private *dip;
- struct bio_vec *bvec = bio->bi_io_vec;
+ struct bio_vec *bvec = dio_bio->bi_io_vec;
+ struct bio *io_bio;
int skip_sum;
int write = rw & REQ_WRITE;
int ret = 0;
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
- dip = kmalloc(sizeof(*dip), GFP_NOFS);
- if (!dip) {
+ io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
+
+ if (!io_bio) {
ret = -ENOMEM;
goto free_ordered;
}
- dip->private = bio->bi_private;
+ dip = kmalloc(sizeof(*dip), GFP_NOFS);
+ if (!dip) {
+ ret = -ENOMEM;
+ goto free_io_bio;
+ }
+
+ dip->private = dio_bio->bi_private;
+ io_bio->bi_private = dio_bio->bi_private;
dip->inode = inode;
dip->logical_offset = file_offset;
@@ -7272,22 +7289,27 @@
do {
dip->bytes += bvec->bv_len;
bvec++;
- } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1));
+ } while (bvec <= (dio_bio->bi_io_vec + dio_bio->bi_vcnt - 1));
- dip->disk_bytenr = (u64)bio->bi_sector << 9;
- bio->bi_private = dip;
+ dip->disk_bytenr = (u64)dio_bio->bi_sector << 9;
+ io_bio->bi_private = dip;
dip->errors = 0;
- dip->orig_bio = bio;
+ dip->orig_bio = io_bio;
+ dip->dio_bio = dio_bio;
atomic_set(&dip->pending_bios, 0);
if (write)
- bio->bi_end_io = btrfs_endio_direct_write;
+ io_bio->bi_end_io = btrfs_endio_direct_write;
else
- bio->bi_end_io = btrfs_endio_direct_read;
+ io_bio->bi_end_io = btrfs_endio_direct_read;
ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
if (!ret)
return;
+
+free_io_bio:
+ bio_put(io_bio);
+
free_ordered:
/*
* If this is a write, we need to clean up the reserved space and kill
@@ -7303,7 +7325,7 @@
btrfs_put_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
}
- bio_endio(bio, ret);
+ bio_endio(dio_bio, ret);
}
static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 0740621..0525e13 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1050,7 +1050,7 @@
}
/* put a new bio on the list */
- bio = bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
if (!bio)
return -ENOMEM;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index f489e24..79bd479 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1296,7 +1296,7 @@
}
WARN_ON(!page->page);
- bio = bio_alloc(GFP_NOFS, 1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio) {
page->io_error = 1;
sblock->no_io_error_seen = 0;
@@ -1431,7 +1431,7 @@
return -EIO;
}
- bio = bio_alloc(GFP_NOFS, 1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
bio->bi_bdev = page_bad->dev->bdev;
@@ -1522,7 +1522,7 @@
sbio->dev = wr_ctx->tgtdev;
bio = sbio->bio;
if (!bio) {
- bio = bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
if (!bio) {
mutex_unlock(&wr_ctx->wr_lock);
return -ENOMEM;
@@ -1930,7 +1930,7 @@
sbio->dev = spage->dev;
bio = sbio->bio;
if (!bio) {
- bio = bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
if (!bio)
return -ENOMEM;
sbio->bio = bio;
@@ -3307,7 +3307,7 @@
"btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
return -EIO;
}
- bio = bio_alloc(GFP_NOFS, 1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio) {
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 062e930..2494008 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5018,42 +5018,16 @@
return 0;
}
-static void *merge_stripe_index_into_bio_private(void *bi_private,
- unsigned int stripe_index)
-{
- /*
- * with single, dup, RAID0, RAID1 and RAID10, stripe_index is
- * at most 1.
- * The alternative solution (instead of stealing bits from the
- * pointer) would be to allocate an intermediate structure
- * that contains the old private pointer plus the stripe_index.
- */
- BUG_ON((((uintptr_t)bi_private) & 3) != 0);
- BUG_ON(stripe_index > 3);
- return (void *)(((uintptr_t)bi_private) | stripe_index);
-}
-
-static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private)
-{
- return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3));
-}
-
-static unsigned int extract_stripe_index_from_bio_private(void *bi_private)
-{
- return (unsigned int)((uintptr_t)bi_private) & 3;
-}
-
static void btrfs_end_bio(struct bio *bio, int err)
{
- struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private);
+ struct btrfs_bio *bbio = bio->bi_private;
int is_orig_bio = 0;
if (err) {
atomic_inc(&bbio->error);
if (err == -EIO || err == -EREMOTEIO) {
unsigned int stripe_index =
- extract_stripe_index_from_bio_private(
- bio->bi_private);
+ btrfs_io_bio(bio)->stripe_index;
struct btrfs_device *dev;
BUG_ON(stripe_index >= bbio->num_stripes);
@@ -5083,8 +5057,7 @@
}
bio->bi_private = bbio->private;
bio->bi_end_io = bbio->end_io;
- bio->bi_bdev = (struct block_device *)
- (unsigned long)bbio->mirror_num;
+ btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
/* only send an error to the higher layers if it is
* beyond the tolerance of the btrfs bio
*/
@@ -5210,8 +5183,7 @@
struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
bio->bi_private = bbio;
- bio->bi_private = merge_stripe_index_into_bio_private(
- bio->bi_private, (unsigned int)dev_nr);
+ btrfs_io_bio(bio)->stripe_index = dev_nr;
bio->bi_end_io = btrfs_end_bio;
bio->bi_sector = physical >> 9;
#ifdef DEBUG
@@ -5272,8 +5244,7 @@
if (atomic_dec_and_test(&bbio->stripes_pending)) {
bio->bi_private = bbio->private;
bio->bi_end_io = bbio->end_io;
- bio->bi_bdev = (struct block_device *)
- (unsigned long)bbio->mirror_num;
+ btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
bio->bi_sector = logical >> 9;
kfree(bbio);
bio_endio(bio, -EIO);
@@ -5351,7 +5322,7 @@
}
if (dev_nr < total_devs - 1) {
- bio = bio_clone(first_bio, GFP_NOFS);
+ bio = btrfs_bio_clone(first_bio, GFP_NOFS);
BUG_ON(!bio); /* -ENOMEM */
} else {
bio = first_bio;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 845ccbb..f6247e2 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -152,6 +152,26 @@
int rotating;
};
+/*
+ * we need the mirror number and stripe index to be passed around
+ * the call chain while we are processing end_io (especially errors).
+ * Really, what we need is a btrfs_bio structure that has this info
+ * and is properly sized with its stripe array, but we're not there
+ * quite yet. We have our own btrfs bioset, and all of the bios
+ * we allocate are actually btrfs_io_bios. We'll cram as much of
+ * struct btrfs_bio as we can into this over time.
+ */
+struct btrfs_io_bio {
+ unsigned long mirror_num;
+ unsigned long stripe_index;
+ struct bio bio;
+};
+
+static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio)
+{
+ return container_of(bio, struct btrfs_io_bio, bio);
+}
+
struct btrfs_bio_stripe {
struct btrfs_device *dev;
u64 physical;