bcache: Track dirty data by stripe
To make background writeback aware of raid5/6 stripes, we first need to
track the amount of dirty data within each stripe - we do this by
breaking up the existing sectors_dirty into per stripe atomic_ts
Signed-off-by: Kent Overstreet <koverstreet@google.com>
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index d099d88..dbddef0 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -437,7 +437,10 @@
/* If nonzero, we're detaching/unregistering from cache set */
atomic_t detaching;
- atomic_long_t sectors_dirty;
+ uint64_t nr_stripes;
+ unsigned stripe_size_bits;
+ atomic_t *stripe_sectors_dirty;
+
unsigned long sectors_dirty_last;
long sectors_dirty_derivative;
@@ -1159,9 +1162,6 @@
/* Forward declarations */
-void bch_writeback_queue(struct cached_dev *);
-void bch_writeback_add(struct cached_dev *, unsigned);
-
void bch_count_io_errors(struct cache *, int, const char *);
void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
int, const char *);
@@ -1224,8 +1224,6 @@
struct cache_set *bch_cache_set_alloc(struct cache_sb *);
void bch_btree_cache_free(struct cache_set *);
int bch_btree_cache_alloc(struct cache_set *);
-void bch_sectors_dirty_init(struct cached_dev *);
-void bch_cached_dev_writeback_init(struct cached_dev *);
void bch_moving_init_cache_set(struct cache_set *);
int bch_cache_allocator_start(struct cache *ca);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 230c3a6..b93cf56 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -24,6 +24,7 @@
#include "btree.h"
#include "debug.h"
#include "request.h"
+#include "writeback.h"
#include <linux/slab.h>
#include <linux/bitops.h>
@@ -1599,14 +1600,14 @@
struct btree_iter *iter,
struct btree_op *op)
{
- void subtract_dirty(struct bkey *k, int sectors)
+ void subtract_dirty(struct bkey *k, uint64_t offset, int sectors)
{
- struct bcache_device *d = b->c->devices[KEY_INODE(k)];
-
- if (KEY_DIRTY(k) && d)
- atomic_long_sub(sectors, &d->sectors_dirty);
+ if (KEY_DIRTY(k))
+ bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
+ offset, -sectors);
}
+ uint64_t old_offset;
unsigned old_size, sectors_found = 0;
while (1) {
@@ -1618,6 +1619,7 @@
if (bkey_cmp(k, &START_KEY(insert)) <= 0)
continue;
+ old_offset = KEY_START(k);
old_size = KEY_SIZE(k);
/*
@@ -1673,7 +1675,7 @@
struct bkey *top;
- subtract_dirty(k, KEY_SIZE(insert));
+ subtract_dirty(k, KEY_START(insert), KEY_SIZE(insert));
if (bkey_written(b, k)) {
/*
@@ -1720,7 +1722,7 @@
}
}
- subtract_dirty(k, old_size - KEY_SIZE(k));
+ subtract_dirty(k, old_offset, old_size - KEY_SIZE(k));
}
check_failed:
@@ -1796,6 +1798,10 @@
insert: shift_keys(b, m, k);
copy: bkey_copy(m, k);
merged:
+ if (KEY_DIRTY(k))
+ bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
+ KEY_START(k), KEY_SIZE(k));
+
bch_check_keys(b, "%u for %s", status, op_type(op));
if (b->level && !KEY_OFFSET(k))
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 6954699..017c95f 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -10,6 +10,7 @@
#include "btree.h"
#include "debug.h"
#include "request.h"
+#include "writeback.h"
#include <linux/cgroup.h>
#include <linux/module.h>
@@ -1044,7 +1045,7 @@
closure_bio_submit(bio, cl, s->d);
} else {
s->op.cache_bio = bio;
- bch_writeback_add(dc, bio_sectors(bio));
+ bch_writeback_add(dc);
}
out:
closure_call(&s->op.cl, bch_insert_data, NULL, cl);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index dbfa1c3..8c73f0c 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -10,6 +10,7 @@
#include "btree.h"
#include "debug.h"
#include "request.h"
+#include "writeback.h"
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
@@ -744,13 +745,35 @@
mempool_destroy(d->unaligned_bvec);
if (d->bio_split)
bioset_free(d->bio_split);
+ if (is_vmalloc_addr(d->stripe_sectors_dirty))
+ vfree(d->stripe_sectors_dirty);
+ else
+ kfree(d->stripe_sectors_dirty);
closure_debug_destroy(&d->cl);
}
-static int bcache_device_init(struct bcache_device *d, unsigned block_size)
+static int bcache_device_init(struct bcache_device *d, unsigned block_size,
+ sector_t sectors)
{
struct request_queue *q;
+ size_t n;
+
+ if (!d->stripe_size_bits)
+ d->stripe_size_bits = 31;
+
+ d->nr_stripes = round_up(sectors, 1 << d->stripe_size_bits) >>
+ d->stripe_size_bits;
+
+ if (!d->nr_stripes || d->nr_stripes > SIZE_MAX / sizeof(atomic_t))
+ return -ENOMEM;
+
+ n = d->nr_stripes * sizeof(atomic_t);
+ d->stripe_sectors_dirty = n < PAGE_SIZE << 6
+ ? kzalloc(n, GFP_KERNEL)
+ : vzalloc(n);
+ if (!d->stripe_sectors_dirty)
+ return -ENOMEM;
if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
!(d->unaligned_bvec = mempool_create_kmalloc_pool(1,
@@ -760,6 +783,7 @@
!(q = blk_alloc_queue(GFP_KERNEL)))
return -ENOMEM;
+ set_capacity(d->disk, sectors);
snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor);
d->disk->major = bcache_major;
@@ -1047,7 +1071,8 @@
hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
}
- ret = bcache_device_init(&dc->disk, block_size);
+ ret = bcache_device_init(&dc->disk, block_size,
+ dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
if (ret)
return ret;
@@ -1146,11 +1171,10 @@
kobject_init(&d->kobj, &bch_flash_dev_ktype);
- if (bcache_device_init(d, block_bytes(c)))
+ if (bcache_device_init(d, block_bytes(c), u->sectors))
goto err;
bcache_device_attach(d, c, u - c->uuids);
- set_capacity(d->disk, u->sectors);
bch_flash_dev_request_init(d);
add_disk(d->disk);
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index f5c2d86..cf8d91e 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -9,6 +9,7 @@
#include "sysfs.h"
#include "btree.h"
#include "request.h"
+#include "writeback.h"
#include <linux/blkdev.h>
#include <linux/sort.h>
@@ -128,7 +129,7 @@
char derivative[20];
char target[20];
bch_hprint(dirty,
- atomic_long_read(&dc->disk.sectors_dirty) << 9);
+ bcache_dev_sectors_dirty(&dc->disk) << 9);
bch_hprint(derivative, dc->writeback_rate_derivative << 9);
bch_hprint(target, dc->writeback_rate_target << 9);
@@ -144,7 +145,7 @@
}
sysfs_hprint(dirty_data,
- atomic_long_read(&dc->disk.sectors_dirty) << 9);
+ bcache_dev_sectors_dirty(&dc->disk) << 9);
var_printf(sequential_merge, "%i");
var_hprint(sequential_cutoff);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 553949e..dd81547 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -9,6 +9,7 @@
#include "bcache.h"
#include "btree.h"
#include "debug.h"
+#include "writeback.h"
#include <trace/events/bcache.h>
@@ -38,7 +39,7 @@
int change = 0;
int64_t error;
- int64_t dirty = atomic_long_read(&dc->disk.sectors_dirty);
+ int64_t dirty = bcache_dev_sectors_dirty(&dc->disk);
int64_t derivative = dirty - dc->disk.sectors_dirty_last;
dc->disk.sectors_dirty_last = dirty;
@@ -183,10 +184,8 @@
}
}
-void bch_writeback_add(struct cached_dev *dc, unsigned sectors)
+void bch_writeback_add(struct cached_dev *dc)
{
- atomic_long_add(sectors, &dc->disk.sectors_dirty);
-
if (!atomic_read(&dc->has_dirty) &&
!atomic_xchg(&dc->has_dirty, 1)) {
atomic_inc(&dc->count);
@@ -205,6 +204,34 @@
}
}
+void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
+ uint64_t offset, int nr_sectors)
+{
+ struct bcache_device *d = c->devices[inode];
+ unsigned stripe_size, stripe_offset;
+ uint64_t stripe;
+
+ if (!d)
+ return;
+
+ stripe_size = 1 << d->stripe_size_bits;
+ stripe = offset >> d->stripe_size_bits;
+ stripe_offset = offset & (stripe_size - 1);
+
+ while (nr_sectors) {
+ int s = min_t(unsigned, abs(nr_sectors),
+ stripe_size - stripe_offset);
+
+ if (nr_sectors < 0)
+ s = -s;
+
+ atomic_add(s, d->stripe_sectors_dirty + stripe);
+ nr_sectors -= s;
+ stripe_offset = 0;
+ stripe++;
+ }
+}
+
/* Background writeback - IO loop */
static void dirty_io_destructor(struct closure *cl)
@@ -392,8 +419,9 @@
break;
if (KEY_DIRTY(k))
- atomic_long_add(KEY_SIZE(k),
- &dc->disk.sectors_dirty);
+ bcache_dev_sectors_dirty_add(b->c, dc->disk.id,
+ KEY_START(k),
+ KEY_SIZE(k));
} else {
btree(sectors_dirty_init, k, b, op, dc);
if (KEY_INODE(k) > dc->disk.id)
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
new file mode 100644
index 0000000..5ce9771
--- /dev/null
+++ b/drivers/md/bcache/writeback.h
@@ -0,0 +1,21 @@
+#ifndef _BCACHE_WRITEBACK_H
+#define _BCACHE_WRITEBACK_H
+
+static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
+{
+ uint64_t i, ret = 0;
+
+ for (i = 0; i < d->nr_stripes; i++)
+ ret += atomic_read(d->stripe_sectors_dirty + i);
+
+ return ret;
+}
+
+void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
+void bch_writeback_queue(struct cached_dev *);
+void bch_writeback_add(struct cached_dev *);
+
+void bch_sectors_dirty_init(struct cached_dev *dc);
+void bch_cached_dev_writeback_init(struct cached_dev *);
+
+#endif