Btrfs: add mount -o ssd_spread to spread allocations out
Some SSDs perform best when reusing block numbers often, while
others perform much better when clustering strictly allocates
big chunks of unused space.
The default mount -o ssd will find rough groupings of blocks
where there are a bunch of free blocks that might have some
allocated blocks mixed in.
mount -o ssd_spread will make sure there are no allocated blocks
mixed in. It should perform better on lower end SSDs.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ce3ab4e..b9d8788 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1100,6 +1100,7 @@
#define BTRFS_MOUNT_COMPRESS (1 << 5)
#define BTRFS_MOUNT_NOTREELOG (1 << 6)
#define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7)
+#define BTRFS_MOUNT_SSD_SPREAD (1 << 8)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a42419c..3355d7e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3607,7 +3607,7 @@
last_ptr_loop = 0;
/* allocate a cluster in this block group */
- ret = btrfs_find_space_cluster(trans,
+ ret = btrfs_find_space_cluster(trans, root,
block_group, last_ptr,
offset, num_bytes,
empty_cluster + empty_size);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index ac23476..4538e48 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -579,6 +579,7 @@
* it returns -enospc
*/
int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
u64 offset, u64 bytes, u64 empty_size)
@@ -595,7 +596,9 @@
int ret;
/* for metadata, allow allocates with more holes */
- if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
+ if (btrfs_test_opt(root, SSD_SPREAD)) {
+ min_bytes = bytes + empty_size;
+ } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
/*
* we want to do larger allocations when we are
* flushing out the delayed refs, it helps prevent
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index ab0bdc0..266fb87 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -31,6 +31,7 @@
u64 bytes);
u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group);
int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
u64 offset, u64 bytes, u64 empty_size);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 22855a1..7f5b288 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,8 +66,8 @@
enum {
Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
- Opt_ssd, Opt_nossd, Opt_thread_pool, Opt_noacl, Opt_compress,
- Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_err,
+ Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl,
+ Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_err,
};
static match_table_t tokens = {
@@ -83,6 +83,7 @@
{Opt_thread_pool, "thread_pool=%d"},
{Opt_compress, "compress"},
{Opt_ssd, "ssd"},
+ {Opt_ssd_spread, "ssd_spread"},
{Opt_nossd, "nossd"},
{Opt_noacl, "noacl"},
{Opt_notreelog, "notreelog"},
@@ -174,9 +175,17 @@
printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
btrfs_set_opt(info->mount_opt, SSD);
break;
+ case Opt_ssd_spread:
+ printk(KERN_INFO "btrfs: use spread ssd "
+ "allocation scheme\n");
+ btrfs_set_opt(info->mount_opt, SSD);
+ btrfs_set_opt(info->mount_opt, SSD_SPREAD);
+ break;
case Opt_nossd:
- printk(KERN_INFO "btrfs: not using ssd allocation scheme\n");
+ printk(KERN_INFO "btrfs: not using ssd allocation "
+ "scheme\n");
btrfs_clear_opt(info->mount_opt, SSD);
+ btrfs_clear_opt(info->mount_opt, SSD_SPREAD);
break;
case Opt_nobarrier:
printk(KERN_INFO "btrfs: turning off barriers\n");
@@ -429,7 +438,9 @@
seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
if (btrfs_test_opt(root, COMPRESS))
seq_puts(seq, ",compress");
- if (btrfs_test_opt(root, SSD))
+ if (btrfs_test_opt(root, SSD_SPREAD))
+ seq_puts(seq, ",ssd_spread");
+ else if (btrfs_test_opt(root, SSD))
seq_puts(seq, ",ssd");
if (btrfs_test_opt(root, NOTREELOG))
seq_puts(seq, ",notreelog");