f2fs: updates on v4.16-rc1
Pull f2fs updates from Jaegeuk Kim:
"In this round, we've followed up to support some generic features such
as cgroup, block reservation, linking fscrypt_ops, delivering
write_hints, and some ioctls. And, we could fix some corner cases in
terms of power-cut recovery and subtle deadlocks.
Enhancements:
- bitmap operations to handle NAT blocks
- readahead to improve readdir speed
- switch to use fscrypt_*
- apply write hints for direct IO
- add reserve_root=%u,resuid=%u,resgid=%u to reserve blocks for root/uid/gid
- modify b_avail and b_free to consider root reserved blocks
- support cgroup writeback
- support FIEMAP_FLAG_XATTR for fibmap
- add F2FS_IOC_PRECACHE_EXTENTS to pre-cache extents
- add F2FS_IOC_{GET/SET}_PIN_FILE to pin LBAs for data blocks
- support inode creation time
Bug fixs:
- sysfile-based quota operations
- memory footprint accounting
- allow to write data on partial preallocation case
- fix deadlock case on fallocate
- fix to handle fill_super errors
- fix missing inode updates of fsync'ed file
- recover renamed file which was fsycn'ed before
- drop inmemory pages in corner error case
- keep last_disk_size correctly
- recover missing i_inline flags during roll-forward
Various clean-up patches were added as well"
Cherry-pick from origin/upstream-f2fs-stable-linux-4.9.y:
71f8f0499e8b f2fs: support inode creation time
58dc6f6fcef7 f2fs: rebuild sit page from sit info in mem
6393cef3f112 f2fs: stop issuing discard if fs is readonly
742bc90e88fa f2fs: clean up duplicated assignment in init_discard_policy
cfabb6edfbc2 f2fs: use GFP_F2FS_ZERO for cleanup
111e8456a697 f2fs: allow to recover node blocks given updated checkpoint
36e041a57ccf f2fs: recover some i_inline flags
3127a7b67ca8 f2fs: correct removexattr behavior for null valued extended attribute
86f78c1e5523 f2fs: drop page cache after fs shutdown
1a3b0047597c f2fs: stop gc/discard thread after fs shutdown
62a91a5a489f f2fs: hanlde error case in f2fs_ioc_shutdown
66356ee5f94c f2fs: split need_inplace_update
5912fbae9da5 f2fs: fix to update last_disk_size correctly
3aa46e2c2187 f2fs: kill F2FS_INLINE_XATTR_ADDRS for cleanup
acdaca27aacb f2fs: clean up error path of fill_super
cf8821115c54 f2fs: avoid hungtask when GC encrypted block if io_bits is set
4be98c9805e4 f2fs: allow quota to use reserved blocks
2a6489c87ee0 f2fs: fix to drop all inmem pages correctly
fd214422395f f2fs: speed up defragment on sparse file
6bce96329c85 f2fs: support F2FS_IOC_PRECACHE_EXTENTS
9ce3d6bb6883 f2fs: add an ioctl to disable GC for specific file
9ef5e6568449 f2fs: prevent newly created inode from being dirtied incorrectly
08ddb1917e04 f2fs: support FIEMAP_FLAG_XATTR
aa9c1c1046e0 f2fs: fix to cover f2fs_inline_data_fiemap with inode_lock
92b8f9c726ef f2fs: check node page again in write end io
4992a3ca15b3 f2fs: fix to caclulate required free section correctly
d1a6b4f6c958 f2fs: handle newly created page when revoking inmem pages
462d762b205a f2fs: add resgid and resuid to reserve root blocks
cbd5e5af8cac f2fs: implement cgroup writeback support
5a5847421d31 f2fs: remove unused pend_list_tag
37d4ca7cd1c1 f2fs: avoid high cpu usage in discard thread
02cfdab8344f f2fs: make local functions static
5fee54098565 f2fs: add reserved blocks for root user
265974636ae0 f2fs: check segment type in __f2fs_replace_block
4f76d6acc6ff f2fs: update inode info to inode page for new file
52b452817452 f2fs: show precise # of blocks that user/root can use
ae0e1fa5a816 f2fs: clean up unneeded declaration
8fc74466298f f2fs: continue to do direct IO if we only preallocate partial blocks
162464df894e f2fs: enable quota at remount from r to w
e270976ff848 f2fs: skip stop_checkpoint for user data writes
d04736926fa7 f2fs: fix missing error number for xattr operation
211cb7bb2428 f2fs: recover directory operations by fsync
2648e735ffe5 f2fs: return error during fill_super
e2a0518d8c24 f2fs: fix an error case of missing update inode page
bf1750bafe86 f2fs: fix potential hangtask in f2fs_trace_pid
c804fcf3df1f f2fs: no need return value in restore summary process
fdd41a8793ad f2fs: use unlikely for release case
a74690b03e24 f2fs: don't return value in truncate_data_blocks_range
987892cc67aa f2fs: clean up f2fs_map_blocks
d7714cb2319a f2fs: clean up hash codes
e3d2a1e946df f2fs: fix error handling in fill_super
b02e72d2942c f2fs: spread f2fs_k{m,z}alloc
ead5259de34d f2fs: inject fault to kvmalloc
e585ca29dd7e f2fs: inject fault to kzalloc
8234ed56e748 f2fs: remove a redundant conditional expression
1a9d6a9c0046 f2fs: apply write hints to select the type of segment for direct write
955e7f58f67b f2fs: switch to fscrypt_prepare_setattr()
268c7f607cb8 f2fs: switch to fscrypt_prepare_lookup()
8dfa646f972c f2fs: switch to fscrypt_prepare_rename()
d5382ccb020a f2fs: switch to fscrypt_prepare_link()
3ccc177c9b8b f2fs: switch to fscrypt_file_open()
8b5674efdc35 f2fs: remove repeated f2fs_bug_on
ba4556cdf10c f2fs: remove an excess variable
46accc925145 f2fs: fix lock dependency in between dio_rwsem & i_mmap_sem
8933908c4f93 f2fs: remove unused parameter
76b6e8ed2058 f2fs: still write data if preallocate only partial blocks
1ed753392fe7 f2fs: introduce sysfs readdir_ra to readahead inode block in readdir
4e68a15eeebc f2fs: fix concurrent problem for updating free bitmap
9be6e7596232 f2fs: remove unneeded memory footprint accounting
923df752db37 f2fs: no need to read nat block if nat_block_bitmap is set
09234be262cb f2fs: reserve nid resource for quota sysfile
Signed-off-by: Jaegeuk Kim <jaegeuk@google.com>
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8f364c5..1692c26 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -43,6 +43,7 @@
char *fault_name[FAULT_MAX] = {
[FAULT_KMALLOC] = "kmalloc",
+ [FAULT_KVMALLOC] = "kvmalloc",
[FAULT_PAGE_ALLOC] = "page alloc",
[FAULT_PAGE_GET] = "page get",
[FAULT_ALLOC_BIO] = "alloc bio",
@@ -106,6 +107,9 @@
Opt_noextent_cache,
Opt_noinline_data,
Opt_data_flush,
+ Opt_reserve_root,
+ Opt_resgid,
+ Opt_resuid,
Opt_mode,
Opt_io_size_bits,
Opt_fault_injection,
@@ -156,6 +160,9 @@
{Opt_noextent_cache, "noextent_cache"},
{Opt_noinline_data, "noinline_data"},
{Opt_data_flush, "data_flush"},
+ {Opt_reserve_root, "reserve_root=%u"},
+ {Opt_resgid, "resgid=%u"},
+ {Opt_resuid, "resuid=%u"},
{Opt_mode, "mode=%s"},
{Opt_io_size_bits, "io_bits=%u"},
{Opt_fault_injection, "fault_injection=%u"},
@@ -190,6 +197,28 @@
va_end(args);
}
+static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
+{
+ block_t limit = (sbi->user_block_count << 1) / 1000;
+
+ /* limit is 0.2% */
+ if (test_opt(sbi, RESERVE_ROOT) && sbi->root_reserved_blocks > limit) {
+ sbi->root_reserved_blocks = limit;
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "Reduce reserved blocks for root = %u",
+ sbi->root_reserved_blocks);
+ }
+ if (!test_opt(sbi, RESERVE_ROOT) &&
+ (!uid_eq(sbi->s_resuid,
+ make_kuid(&init_user_ns, F2FS_DEF_RESUID)) ||
+ !gid_eq(sbi->s_resgid,
+ make_kgid(&init_user_ns, F2FS_DEF_RESGID))))
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root",
+ from_kuid_munged(&init_user_ns, sbi->s_resuid),
+ from_kgid_munged(&init_user_ns, sbi->s_resgid));
+}
+
static void init_once(void *foo)
{
struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
@@ -320,6 +349,8 @@
substring_t args[MAX_OPT_ARGS];
char *p, *name;
int arg = 0;
+ kuid_t uid;
+ kgid_t gid;
#ifdef CONFIG_QUOTA
int ret;
#endif
@@ -487,6 +518,40 @@
case Opt_data_flush:
set_opt(sbi, DATA_FLUSH);
break;
+ case Opt_reserve_root:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+ if (test_opt(sbi, RESERVE_ROOT)) {
+ f2fs_msg(sb, KERN_INFO,
+ "Preserve previous reserve_root=%u",
+ sbi->root_reserved_blocks);
+ } else {
+ sbi->root_reserved_blocks = arg;
+ set_opt(sbi, RESERVE_ROOT);
+ }
+ break;
+ case Opt_resuid:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+ uid = make_kuid(current_user_ns(), arg);
+ if (!uid_valid(uid)) {
+ f2fs_msg(sb, KERN_ERR,
+ "Invalid uid value %d", arg);
+ return -EINVAL;
+ }
+ sbi->s_resuid = uid;
+ break;
+ case Opt_resgid:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+ gid = make_kgid(current_user_ns(), arg);
+ if (!gid_valid(gid)) {
+ f2fs_msg(sb, KERN_ERR,
+ "Invalid gid value %d", arg);
+ return -EINVAL;
+ }
+ sbi->s_resgid = gid;
+ break;
case Opt_mode:
name = match_strdup(&args[0]);
@@ -993,22 +1058,25 @@
struct super_block *sb = dentry->d_sb;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
- block_t total_count, user_block_count, start_count, ovp_count;
+ block_t total_count, user_block_count, start_count;
u64 avail_node_count;
total_count = le64_to_cpu(sbi->raw_super->block_count);
user_block_count = sbi->user_block_count;
start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr);
- ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
buf->f_type = F2FS_SUPER_MAGIC;
buf->f_bsize = sbi->blocksize;
buf->f_blocks = total_count - start_count;
- buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count;
- buf->f_bavail = user_block_count - valid_user_blocks(sbi) -
+ buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
sbi->current_reserved_blocks;
+ if (buf->f_bfree > sbi->root_reserved_blocks)
+ buf->f_bavail = buf->f_bfree - sbi->root_reserved_blocks;
+ else
+ buf->f_bavail = 0;
- avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
+ avail_node_count = sbi->total_node_count - sbi->nquota_files -
+ F2FS_RESERVED_NODE_NUM;
if (avail_node_count > user_block_count) {
buf->f_files = user_block_count;
@@ -1134,6 +1202,11 @@
else if (test_opt(sbi, LFS))
seq_puts(seq, "lfs");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
+ if (test_opt(sbi, RESERVE_ROOT))
+ seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u",
+ sbi->root_reserved_blocks,
+ from_kuid_munged(&init_user_ns, sbi->s_resuid),
+ from_kgid_munged(&init_user_ns, sbi->s_resgid));
if (F2FS_IO_SIZE_BITS(sbi))
seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
#ifdef CONFIG_F2FS_FAULT_INJECTION
@@ -1263,7 +1336,7 @@
err = dquot_suspend(sb, -1);
if (err < 0)
goto restore_opts;
- } else {
+ } else if (f2fs_readonly(sb) && !(*flags & MS_RDONLY)) {
/* dquot_resume needs RW */
sb->s_flags &= ~MS_RDONLY;
if (sb_any_quota_suspended(sb)) {
@@ -1332,6 +1405,7 @@
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
+ limit_reserve_root(sbi);
return 0;
restore_gc:
if (need_restart_gc) {
@@ -1656,7 +1730,7 @@
f2fs_quota_off(sb, type);
}
-int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
+static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
{
*projid = F2FS_I(inode)->i_projid;
return 0;
@@ -2148,14 +2222,15 @@
if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
FDEV(devi).nr_blkz++;
- FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL);
+ FDEV(devi).blkz_type = f2fs_kmalloc(sbi, FDEV(devi).nr_blkz,
+ GFP_KERNEL);
if (!FDEV(devi).blkz_type)
return -ENOMEM;
#define F2FS_REPORT_NR_ZONES 4096
- zones = kcalloc(F2FS_REPORT_NR_ZONES, sizeof(struct blk_zone),
- GFP_KERNEL);
+ zones = f2fs_kzalloc(sbi, sizeof(struct blk_zone) *
+ F2FS_REPORT_NR_ZONES, GFP_KERNEL);
if (!zones)
return -ENOMEM;
@@ -2299,8 +2374,8 @@
* Initialize multiple devices information, or single
* zoned block device information.
*/
- sbi->devs = kcalloc(max_devices, sizeof(struct f2fs_dev_info),
- GFP_KERNEL);
+ sbi->devs = f2fs_kzalloc(sbi, sizeof(struct f2fs_dev_info) *
+ max_devices, GFP_KERNEL);
if (!sbi->devs)
return -ENOMEM;
@@ -2423,6 +2498,9 @@
sb->s_fs_info = sbi;
sbi->raw_super = raw_super;
+ sbi->s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
+ sbi->s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
+
/* precompute checksum seed for metadata */
if (f2fs_sb_has_inode_chksum(sb))
sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid,
@@ -2466,6 +2544,13 @@
else
sb->s_qcop = &f2fs_quotactl_ops;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
+
+ if (f2fs_sb_has_quota_ino(sbi->sb)) {
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if (f2fs_qf_ino(sbi->sb, i))
+ sbi->nquota_files++;
+ }
+ }
#endif
sb->s_op = &f2fs_sops;
@@ -2479,6 +2564,7 @@
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
+ sb->s_iflags |= SB_I_CGROUPWB;
/* init f2fs-specific super block info */
sbi->valid_super_block = valid_super_block;
@@ -2499,8 +2585,9 @@
int n = (i == META) ? 1: NR_TEMP_TYPE;
int j;
- sbi->write_io[i] = kmalloc(n * sizeof(struct f2fs_bio_info),
- GFP_KERNEL);
+ sbi->write_io[i] = f2fs_kmalloc(sbi,
+ n * sizeof(struct f2fs_bio_info),
+ GFP_KERNEL);
if (!sbi->write_io[i]) {
err = -ENOMEM;
goto free_options;
@@ -2521,14 +2608,14 @@
err = init_percpu_info(sbi);
if (err)
- goto free_options;
+ goto free_bio_info;
if (F2FS_IO_SIZE(sbi) > 1) {
sbi->write_io_dummy =
mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
if (!sbi->write_io_dummy) {
err = -ENOMEM;
- goto free_options;
+ goto free_percpu;
}
}
@@ -2563,6 +2650,7 @@
sbi->last_valid_block_count = sbi->total_valid_block_count;
sbi->reserved_blocks = 0;
sbi->current_reserved_blocks = 0;
+ limit_reserve_root(sbi);
for (i = 0; i < NR_INODE_TYPE; i++) {
INIT_LIST_HEAD(&sbi->inode_list[i]);
@@ -2608,18 +2696,16 @@
goto free_nm;
}
- f2fs_join_shrinker(sbi);
-
err = f2fs_build_stats(sbi);
if (err)
- goto free_nm;
+ goto free_node_inode;
/* read root inode and dentry */
root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
if (IS_ERR(root)) {
f2fs_msg(sb, KERN_ERR, "Failed to read root inode");
err = PTR_ERR(root);
- goto free_node_inode;
+ goto free_stats;
}
if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
iput(root);
@@ -2715,6 +2801,8 @@
sbi->valid_super_block ? 1 : 2, err);
}
+ f2fs_join_shrinker(sbi);
+
f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx",
cur_cp_version(F2FS_CKPT(sbi)));
f2fs_update_time(sbi, CP_TIME);
@@ -2741,14 +2829,12 @@
free_root_inode:
dput(sb->s_root);
sb->s_root = NULL;
-free_node_inode:
- truncate_inode_pages_final(NODE_MAPPING(sbi));
- mutex_lock(&sbi->umount_mutex);
- release_ino_entry(sbi, true);
- f2fs_leave_shrinker(sbi);
- iput(sbi->node_inode);
- mutex_unlock(&sbi->umount_mutex);
+free_stats:
f2fs_destroy_stats(sbi);
+free_node_inode:
+ release_ino_entry(sbi, true);
+ truncate_inode_pages_final(NODE_MAPPING(sbi));
+ iput(sbi->node_inode);
free_nm:
destroy_node_manager(sbi);
free_sm:
@@ -2761,10 +2847,12 @@
iput(sbi->meta_inode);
free_io_dummy:
mempool_destroy(sbi->write_io_dummy);
-free_options:
+free_percpu:
+ destroy_percpu_info(sbi);
+free_bio_info:
for (i = 0; i < NR_PAGE_TYPE; i++)
kfree(sbi->write_io[i]);
- destroy_percpu_info(sbi);
+free_options:
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
kfree(sbi->s_qf_names[i]);