Btrfs: add flushoncommit mount option
The 'flushoncommit' mount option forces any data dirtied by a write in a
prior transaction to commit as part of the current commit. This makes
the committed state a fully consistent view of the file system from the
application's perspective (i.e., it includes all completed file system
operations). This was previously the behavior only when a snapshot is
created.
This is used by Ceph to ensure that completed writes make it to the
platter along with the metadata operations they are bound to (by
BTRFS_IOC_TRANS_{START,END}).
Signed-off-by: Sage Weil <sage@newdream.net>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 1e99a99..bb6ac5b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1037,6 +1037,7 @@
#define BTRFS_MOUNT_DEGRADED (1 << 4)
#define BTRFS_MOUNT_COMPRESS (1 << 5)
#define BTRFS_MOUNT_NOTREELOG (1 << 6)
+#define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 3baa2c1..9744af9 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -68,7 +68,7 @@
Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog,
- Opt_err,
+ Opt_flushoncommit, Opt_err,
};
static match_table_t tokens = {
@@ -86,6 +86,7 @@
{Opt_ssd, "ssd"},
{Opt_noacl, "noacl"},
{Opt_notreelog, "notreelog"},
+ {Opt_flushoncommit, "flushoncommit"},
{Opt_err, NULL},
};
@@ -229,6 +230,10 @@
printk(KERN_INFO "btrfs: disabling tree log\n");
btrfs_set_opt(info->mount_opt, NOTREELOG);
break;
+ case Opt_flushoncommit:
+ printk(KERN_INFO "btrfs: turning on flush-on-commit\n");
+ btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT);
+ break;
default:
break;
}
@@ -370,9 +375,8 @@
int btrfs_sync_fs(struct super_block *sb, int wait)
{
struct btrfs_trans_handle *trans;
- struct btrfs_root *root;
+ struct btrfs_root *root = btrfs_sb(sb);
int ret;
- root = btrfs_sb(sb);
if (sb->s_flags & MS_RDONLY)
return 0;
@@ -419,7 +423,9 @@
if (btrfs_test_opt(root, SSD))
seq_puts(seq, ",ssd");
if (btrfs_test_opt(root, NOTREELOG))
- seq_puts(seq, ",notreelog");
+ seq_puts(seq, ",no-treelog");
+ if (btrfs_test_opt(root, FLUSHONCOMMIT))
+ seq_puts(seq, ",flush-on-commit");
if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
seq_puts(seq, ",noacl");
return 0;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 3e8225d..2869b33 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -972,6 +972,7 @@
int ret;
int should_grow = 0;
unsigned long now = get_seconds();
+ int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT);
btrfs_run_ordered_operations(root, 0);
@@ -1051,7 +1052,9 @@
mutex_unlock(&root->fs_info->trans_mutex);
- if (snap_pending) {
+ if (flush_on_commit || snap_pending) {
+ if (flush_on_commit)
+ btrfs_start_delalloc_inodes(root);
ret = btrfs_wait_ordered_extents(root, 1);
BUG_ON(ret);
}