FROMLIST: f2fs: early updates queued for v4.18-rc1
Cherry-picked from:
origin/upstream-f2fs-stable-linux-4.9.y
cea3ae5716a2 ("f2fs: turn down IO priority of discard from background")
0357a0faca5c ("f2fs: don't split checkpoint in fstrim")
41059095317d ("f2fs: issue discard commands proactively in high fs utilization")
3a38cf1525f5 ("f2fs: add fsync_mode=nobarrier for non-atomic files")
8f6a2e7a6669 ("f2fs: let fstrim issue discard commands in lower priority")
Change-Id: Id7bfb0e31103b42b0eb87928d2d6bc34f18f555c
Signed-off-by: Jaegeuk Kim <jaegeuk@google.com>
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 84be1e7..13ed703 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -177,15 +177,12 @@
#define CP_DISCARD 0x00000010
#define CP_TRIMMED 0x00000020
-#define DEF_BATCHED_TRIM_SECTIONS 2048
-#define BATCHED_TRIM_SEGMENTS(sbi) \
- (GET_SEG_FROM_SEC(sbi, SM_I(sbi)->trim_sections))
-#define BATCHED_TRIM_BLOCKS(sbi) \
- (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */
+#define DEF_MAX_DISCARD_LEN 512 /* Max. 2MB per discard */
#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */
#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */
+#define DEF_DISCARD_URGENT_UTIL 80 /* do more discard over 80% */
#define DEF_CP_INTERVAL 60 /* 60 secs */
#define DEF_IDLE_INTERVAL 5 /* 5 secs */
@@ -692,7 +689,8 @@
static inline bool __is_discard_mergeable(struct discard_info *back,
struct discard_info *front)
{
- return back->lstart + back->len == front->lstart;
+ return (back->lstart + back->len == front->lstart) &&
+ (back->len + front->len < DEF_MAX_DISCARD_LEN);
}
static inline bool __is_discard_back_mergeable(struct discard_info *cur,
@@ -1078,6 +1076,7 @@
enum fsync_mode {
FSYNC_MODE_POSIX, /* fsync follows posix semantics */
FSYNC_MODE_STRICT, /* fsync behaves in line with ext4 */
+ FSYNC_MODE_NOBARRIER, /* fsync behaves nobarrier based on posix */
};
#ifdef CONFIG_F2FS_FS_ENCRYPTION
@@ -2802,8 +2801,6 @@
void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
-void init_discard_policy(struct discard_policy *dpolicy, int discard_type,
- unsigned int granularity);
void drop_discard_cmd(struct f2fs_sb_info *sbi);
void stop_discard_thread(struct f2fs_sb_info *sbi);
bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 0e39c77..44a2e32 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -308,7 +308,7 @@
remove_ino_entry(sbi, ino, APPEND_INO);
clear_inode_flag(inode, FI_APPEND_WRITE);
flush_out:
- if (!atomic)
+ if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER)
ret = f2fs_issue_flush(sbi, inode->i_ino);
if (!ret) {
remove_ino_entry(sbi, ino, UPDATE_INO);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index bdf567a..98fe1ed 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -915,6 +915,39 @@
#endif
}
+static void __init_discard_policy(struct f2fs_sb_info *sbi,
+ struct discard_policy *dpolicy,
+ int discard_type, unsigned int granularity)
+{
+ /* common policy */
+ dpolicy->type = discard_type;
+ dpolicy->sync = true;
+ dpolicy->granularity = granularity;
+
+ dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+ dpolicy->io_aware_gran = MAX_PLIST_NUM;
+
+ if (discard_type == DPOLICY_BG) {
+ dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+ dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+ dpolicy->io_aware = true;
+ dpolicy->sync = false;
+ if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
+ dpolicy->granularity = 1;
+ dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+ }
+ } else if (discard_type == DPOLICY_FORCE) {
+ dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+ dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+ dpolicy->io_aware = false;
+ } else if (discard_type == DPOLICY_FSTRIM) {
+ dpolicy->io_aware = false;
+ } else if (discard_type == DPOLICY_UMOUNT) {
+ dpolicy->io_aware = false;
+ }
+}
+
+
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
@@ -1130,68 +1163,6 @@
return 0;
}
-static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
- struct discard_policy *dpolicy,
- unsigned int start, unsigned int end)
-{
- struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
- struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
- struct rb_node **insert_p = NULL, *insert_parent = NULL;
- struct discard_cmd *dc;
- struct blk_plug plug;
- int issued;
-
-next:
- issued = 0;
-
- mutex_lock(&dcc->cmd_lock);
- f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
-
- dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
- NULL, start,
- (struct rb_entry **)&prev_dc,
- (struct rb_entry **)&next_dc,
- &insert_p, &insert_parent, true);
- if (!dc)
- dc = next_dc;
-
- blk_start_plug(&plug);
-
- while (dc && dc->lstart <= end) {
- struct rb_node *node;
-
- if (dc->len < dpolicy->granularity)
- goto skip;
-
- if (dc->state != D_PREP) {
- list_move_tail(&dc->list, &dcc->fstrim_list);
- goto skip;
- }
-
- __submit_discard_cmd(sbi, dpolicy, dc);
-
- if (++issued >= dpolicy->max_requests) {
- start = dc->lstart + dc->len;
-
- blk_finish_plug(&plug);
- mutex_unlock(&dcc->cmd_lock);
-
- schedule();
-
- goto next;
- }
-skip:
- node = rb_next(&dc->rb_node);
- dc = rb_entry_safe(node, struct discard_cmd, rb_node);
-
- if (fatal_signal_pending(current))
- break;
- }
-
- blk_finish_plug(&plug);
- mutex_unlock(&dcc->cmd_lock);
-}
-
static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
{
@@ -1332,7 +1303,18 @@
static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
{
- __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
+ struct discard_policy dp;
+
+ if (dpolicy) {
+ __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
+ return;
+ }
+
+ /* wait all */
+ __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
+ __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+ __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
+ __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
}
/* This should be covered by global mutex, &sit_i->sentry_lock */
@@ -1377,11 +1359,13 @@
struct discard_policy dpolicy;
bool dropped;
- init_discard_policy(&dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity);
+ __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
+ dcc->discard_granularity);
__issue_discard_cmd(sbi, &dpolicy);
dropped = __drop_discard_cmd(sbi);
- __wait_all_discard_cmd(sbi, &dpolicy);
+ /* just to make sure there is no pending discard commands */
+ __wait_all_discard_cmd(sbi, NULL);
return dropped;
}
@@ -1397,7 +1381,7 @@
set_freezable();
do {
- init_discard_policy(&dpolicy, DPOLICY_BG,
+ __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
dcc->discard_granularity);
wait_event_interruptible_timeout(*q,
@@ -1415,7 +1399,7 @@
dcc->discard_wake = 0;
if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
- init_discard_policy(&dpolicy, DPOLICY_FORCE, 1);
+ __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
sb_start_intwrite(sbi->sb);
@@ -1708,32 +1692,6 @@
wake_up_discard_thread(sbi, false);
}
-void init_discard_policy(struct discard_policy *dpolicy,
- int discard_type, unsigned int granularity)
-{
- /* common policy */
- dpolicy->type = discard_type;
- dpolicy->sync = true;
- dpolicy->granularity = granularity;
-
- dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
- dpolicy->io_aware_gran = MAX_PLIST_NUM;
-
- if (discard_type == DPOLICY_BG) {
- dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
- dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
- dpolicy->io_aware = true;
- } else if (discard_type == DPOLICY_FORCE) {
- dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
- dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
- dpolicy->io_aware = false;
- } else if (discard_type == DPOLICY_FSTRIM) {
- dpolicy->io_aware = false;
- } else if (discard_type == DPOLICY_UMOUNT) {
- dpolicy->io_aware = false;
- }
-}
-
static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
{
dev_t dev = sbi->sb->s_bdev->bd_dev;
@@ -2373,11 +2331,72 @@
return has_candidate;
}
+static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
+ struct discard_policy *dpolicy,
+ unsigned int start, unsigned int end)
+{
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
+ struct rb_node **insert_p = NULL, *insert_parent = NULL;
+ struct discard_cmd *dc;
+ struct blk_plug plug;
+ int issued;
+
+next:
+ issued = 0;
+
+ mutex_lock(&dcc->cmd_lock);
+ f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
+
+ dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
+ NULL, start,
+ (struct rb_entry **)&prev_dc,
+ (struct rb_entry **)&next_dc,
+ &insert_p, &insert_parent, true);
+ if (!dc)
+ dc = next_dc;
+
+ blk_start_plug(&plug);
+
+ while (dc && dc->lstart <= end) {
+ struct rb_node *node;
+
+ if (dc->len < dpolicy->granularity)
+ goto skip;
+
+ if (dc->state != D_PREP) {
+ list_move_tail(&dc->list, &dcc->fstrim_list);
+ goto skip;
+ }
+
+ __submit_discard_cmd(sbi, dpolicy, dc);
+
+ if (++issued >= dpolicy->max_requests) {
+ start = dc->lstart + dc->len;
+
+ blk_finish_plug(&plug);
+ mutex_unlock(&dcc->cmd_lock);
+ __wait_all_discard_cmd(sbi, NULL);
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto next;
+ }
+skip:
+ node = rb_next(&dc->rb_node);
+ dc = rb_entry_safe(node, struct discard_cmd, rb_node);
+
+ if (fatal_signal_pending(current))
+ break;
+ }
+
+ blk_finish_plug(&plug);
+ mutex_unlock(&dcc->cmd_lock);
+}
+
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
{
__u64 start = F2FS_BYTES_TO_BLK(range->start);
__u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
- unsigned int start_segno, end_segno, cur_segno;
+ unsigned int start_segno, end_segno;
block_t start_block, end_block;
struct cp_control cpc;
struct discard_policy dpolicy;
@@ -2403,40 +2422,27 @@
cpc.reason = CP_DISCARD;
cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
+ cpc.trim_start = start_segno;
+ cpc.trim_end = end_segno;
- /* do checkpoint to issue discard commands safely */
- for (cur_segno = start_segno; cur_segno <= end_segno;
- cur_segno = cpc.trim_end + 1) {
- cpc.trim_start = cur_segno;
+ if (sbi->discard_blks == 0)
+ goto out;
- if (sbi->discard_blks == 0)
- break;
- else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi))
- cpc.trim_end = end_segno;
- else
- cpc.trim_end = min_t(unsigned int,
- rounddown(cur_segno +
- BATCHED_TRIM_SEGMENTS(sbi),
- sbi->segs_per_sec) - 1, end_segno);
-
- mutex_lock(&sbi->gc_mutex);
- err = write_checkpoint(sbi, &cpc);
- mutex_unlock(&sbi->gc_mutex);
- if (err)
- break;
-
- schedule();
- }
+ mutex_lock(&sbi->gc_mutex);
+ err = write_checkpoint(sbi, &cpc);
+ mutex_unlock(&sbi->gc_mutex);
+ if (err)
+ goto out;
start_block = START_BLOCK(sbi, start_segno);
- end_block = START_BLOCK(sbi, min(cur_segno, end_segno) + 1);
+ end_block = START_BLOCK(sbi, end_segno + 1);
- init_discard_policy(&dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
+ __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
__issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block);
trimmed = __wait_discard_cmd_range(sbi, &dpolicy,
start_block, end_block);
-out:
range->len = F2FS_BLK_TO_BYTES(trimmed);
+out:
return err;
}
@@ -3824,8 +3830,6 @@
sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
sm_info->min_ssr_sections = reserved_sections(sbi);
- sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
-
INIT_LIST_HEAD(&sm_info->sit_entry_set);
init_rwsem(&sm_info->curseg_lock);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 7d4621a..2a20e3d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -740,6 +740,10 @@
} else if (strlen(name) == 6 &&
!strncmp(name, "strict", 6)) {
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT;
+ } else if (strlen(name) == 9 &&
+ !strncmp(name, "nobarrier", 9)) {
+ F2FS_OPTION(sbi).fsync_mode =
+ FSYNC_MODE_NOBARRIER;
} else {
kfree(name);
return -EINVAL;
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index f33a56d..2c53de9 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -245,6 +245,9 @@
return count;
}
+ if (!strcmp(a->attr.name, "trim_sections"))
+ return -EINVAL;
+
*ui = t;
if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)