Merge tag 'for-f2fs-3.16' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim:
"In this round, there is no special interesting feature, but we've
investigated a couple of tuning points with respect to the I/O flow.
Several major bug fixes and a bunch of clean-ups also have been made.
This patch-set includes the following major enhancement patches:
- enhance wait_on_page_writeback
- support SEEK_DATA and SEEK_HOLE
- enhance readahead flows
- enhance IO flushes
- support fiemap
- add some tracepoints
The other bug fixes are as follows:
- fix to support a large volume > 2TB correctly
- recovery bug fix wrt fallocated space
- fix recursive lock on xattr operations
- fix some cases on the remount flow
And, there are a bunch of cleanups"
* tag 'for-f2fs-3.16' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (52 commits)
f2fs: support f2fs_fiemap
f2fs: avoid not to call remove_dirty_inode
f2fs: recover fallocated space
f2fs: fix to recover data written by dio
f2fs: large volume support
f2fs: avoid crash when trace f2fs_submit_page_mbio event in ra_sum_pages
f2fs: avoid overflow when large directory feathure is enabled
f2fs: fix recursive lock by f2fs_setxattr
MAINTAINERS: add a co-maintainer from samsung for F2FS
MAINTAINERS: change the email address for f2fs
f2fs: use inode_init_owner() to simplify codes
f2fs: avoid to use slab memory in f2fs_issue_flush for efficiency
f2fs: add a tracepoint for f2fs_read_data_page
f2fs: add a tracepoint for f2fs_write_{meta,node,data}_pages
f2fs: add a tracepoint for f2fs_write_{meta,node,data}_page
f2fs: add a tracepoint for f2fs_write_end
f2fs: add a tracepoint for f2fs_write_begin
f2fs: fix checkpatch warning
f2fs: deactivate inode page if the inode is evicted
f2fs: decrease the lock granularity during write_begin
...
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 25311e11..51afba1 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -461,11 +461,11 @@
# of blocks in level #n = |
`- 4, Otherwise
- ,- 2^ (n + dir_level),
- | if n < MAX_DIR_HASH_DEPTH / 2,
+ ,- 2^(n + dir_level),
+ | if n + dir_level < MAX_DIR_HASH_DEPTH / 2,
# of buckets in level #n = |
- `- 2^((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1),
- Otherwise
+ `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1),
+ Otherwise
When F2FS finds a file name in a directory, at first a hash value of the file
name is calculated. Then, F2FS scans the hash table in level #0 to find the
diff --git a/MAINTAINERS b/MAINTAINERS
index a1f4b57..9483795 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3792,7 +3792,8 @@
F: include/linux/fscache*.h
F2FS FILE SYSTEM
-M: Jaegeuk Kim <jaegeuk.kim@samsung.com>
+M: Jaegeuk Kim <jaegeuk@kernel.org>
+M: Changman Lee <cm224.lee@samsung.com>
L: linux-f2fs-devel@lists.sourceforge.net
W: http://en.wikipedia.org/wiki/F2FS
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index e93e4ec..dbe2141 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -240,7 +240,7 @@
}
}
- error = f2fs_setxattr(inode, name_index, "", value, size, ipage);
+ error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0);
kfree(value);
if (!error)
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index c405b8f..0b4710c 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -33,12 +33,12 @@
struct address_space *mapping = META_MAPPING(sbi);
struct page *page = NULL;
repeat:
- page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
+ page = grab_cache_page(mapping, index);
if (!page) {
cond_resched();
goto repeat;
}
-
+ f2fs_wait_on_page_writeback(page, META);
SetPageUptodate(page);
return page;
}
@@ -72,7 +72,7 @@
return page;
}
-inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
+static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
{
switch (type) {
case META_NAT:
@@ -154,6 +154,8 @@
struct inode *inode = page->mapping->host;
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ trace_f2fs_writepage(page, META);
+
if (unlikely(sbi->por_doing))
goto redirty_out;
if (wbc->for_reclaim)
@@ -171,10 +173,7 @@
return 0;
redirty_out:
- dec_page_count(sbi, F2FS_DIRTY_META);
- wbc->pages_skipped++;
- account_page_redirty(page);
- set_page_dirty(page);
+ redirty_page_for_writepage(wbc, page);
return AOP_WRITEPAGE_ACTIVATE;
}
@@ -184,6 +183,8 @@
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
long diff, written;
+ trace_f2fs_writepages(mapping->host, wbc, META);
+
/* collect a number of dirty meta pages and write together */
if (wbc->for_kupdate ||
get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
@@ -367,7 +368,9 @@
return;
sbi->por_doing = true;
- start_blk = __start_cp_addr(sbi) + 1;
+
+ start_blk = __start_cp_addr(sbi) + 1 +
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
orphan_blkaddr = __start_sum_addr(sbi) - 1;
ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
@@ -508,8 +511,11 @@
unsigned long blk_size = sbi->blocksize;
unsigned long long cp1_version = 0, cp2_version = 0;
unsigned long long cp_start_blk_no;
+ unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+ block_t cp_blk_no;
+ int i;
- sbi->ckpt = kzalloc(blk_size, GFP_KERNEL);
+ sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
if (!sbi->ckpt)
return -ENOMEM;
/*
@@ -540,6 +546,23 @@
cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
memcpy(sbi->ckpt, cp_block, blk_size);
+ if (cp_blks <= 1)
+ goto done;
+
+ cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
+ if (cur_page == cp2)
+ cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
+
+ for (i = 1; i < cp_blks; i++) {
+ void *sit_bitmap_ptr;
+ unsigned char *ckpt = (unsigned char *)sbi->ckpt;
+
+ cur_page = get_meta_page(sbi, cp_blk_no + i);
+ sit_bitmap_ptr = page_address(cur_page);
+ memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
+ f2fs_put_page(cur_page, 1);
+ }
+done:
f2fs_put_page(cp1, 1);
f2fs_put_page(cp2, 1);
return 0;
@@ -552,14 +575,13 @@
static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- struct list_head *head = &sbi->dir_inode_list;
- struct dir_inode_entry *entry;
- list_for_each_entry(entry, head, list)
- if (unlikely(entry->inode == inode))
- return -EEXIST;
+ if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR))
+ return -EEXIST;
- list_add_tail(&new->list, head);
+ set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
+ F2FS_I(inode)->dirty_dir = new;
+ list_add_tail(&new->list, &sbi->dir_inode_list);
stat_inc_dirty_dir(sbi);
return 0;
}
@@ -608,31 +630,26 @@
void remove_dirty_dir_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- struct list_head *head;
struct dir_inode_entry *entry;
if (!S_ISDIR(inode->i_mode))
return;
spin_lock(&sbi->dir_inode_lock);
- if (get_dirty_dents(inode)) {
+ if (get_dirty_dents(inode) ||
+ !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
spin_unlock(&sbi->dir_inode_lock);
return;
}
- head = &sbi->dir_inode_list;
- list_for_each_entry(entry, head, list) {
- if (entry->inode == inode) {
- list_del(&entry->list);
- stat_dec_dirty_dir(sbi);
- spin_unlock(&sbi->dir_inode_lock);
- kmem_cache_free(inode_entry_slab, entry);
- goto done;
- }
- }
+ entry = F2FS_I(inode)->dirty_dir;
+ list_del(&entry->list);
+ F2FS_I(inode)->dirty_dir = NULL;
+ clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
+ stat_dec_dirty_dir(sbi);
spin_unlock(&sbi->dir_inode_lock);
+ kmem_cache_free(inode_entry_slab, entry);
-done:
/* Only from the recovery routine */
if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
@@ -640,26 +657,6 @@
}
}
-struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino)
-{
-
- struct list_head *head;
- struct inode *inode = NULL;
- struct dir_inode_entry *entry;
-
- spin_lock(&sbi->dir_inode_lock);
-
- head = &sbi->dir_inode_list;
- list_for_each_entry(entry, head, list) {
- if (entry->inode->i_ino == ino) {
- inode = entry->inode;
- break;
- }
- }
- spin_unlock(&sbi->dir_inode_lock);
- return inode;
-}
-
void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
{
struct list_head *head;
@@ -758,6 +755,13 @@
__u32 crc32 = 0;
void *kaddr;
int i;
+ int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+
+ /*
+ * This avoids to conduct wrong roll-forward operations and uses
+ * metapages, so should be called prior to sync_meta_pages below.
+ */
+ discard_next_dnode(sbi);
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META))
@@ -802,16 +806,19 @@
orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)
/ F2FS_ORPHANS_PER_BLOCK;
- ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks);
+ ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
+ orphan_blocks);
if (is_umount) {
set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
- data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE);
+ cp_payload_blks + data_sum_blocks +
+ orphan_blocks + NR_CURSEG_NODE_TYPE);
} else {
clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
- data_sum_blocks + orphan_blocks);
+ cp_payload_blks + data_sum_blocks +
+ orphan_blocks);
}
if (sbi->n_orphans)
@@ -837,6 +844,15 @@
set_page_dirty(cp_page);
f2fs_put_page(cp_page, 1);
+ for (i = 1; i < 1 + cp_payload_blks; i++) {
+ cp_page = grab_meta_page(sbi, start_blk++);
+ kaddr = page_address(cp_page);
+ memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE,
+ (1 << sbi->log_blocksize));
+ set_page_dirty(cp_page);
+ f2fs_put_page(cp_page, 1);
+ }
+
if (sbi->n_orphans) {
write_orphan_inodes(sbi, start_blk);
start_blk += orphan_blocks;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 45abd60..c1fb6dd 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -417,7 +417,7 @@
if (unlikely(dn.data_blkaddr == NEW_ADDR))
return ERR_PTR(-EINVAL);
- page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
+ page = grab_cache_page(mapping, index);
if (!page)
return ERR_PTR(-ENOMEM);
@@ -455,7 +455,7 @@
int err;
repeat:
- page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
+ page = grab_cache_page(mapping, index);
if (!page)
return ERR_PTR(-ENOMEM);
@@ -652,8 +652,7 @@
goto put_out;
}
- end_offset = IS_INODE(dn.node_page) ?
- ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+ end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
bh_result->b_size = (((size_t)1) << blkbits);
dn.ofs_in_node++;
pgofs++;
@@ -675,8 +674,7 @@
if (dn.data_blkaddr == NEW_ADDR)
goto put_out;
- end_offset = IS_INODE(dn.node_page) ?
- ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+ end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
}
if (maxblocks > (bh_result->b_size >> blkbits)) {
@@ -710,11 +708,19 @@
return err;
}
+int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ u64 start, u64 len)
+{
+ return generic_block_fiemap(inode, fieinfo, start, len, get_data_block);
+}
+
static int f2fs_read_data_page(struct file *file, struct page *page)
{
struct inode *inode = page->mapping->host;
int ret;
+ trace_f2fs_readpage(page, DATA);
+
/* If the file has inline data, try to read it directlly */
if (f2fs_has_inline_data(inode))
ret = f2fs_read_inline_data(inode, page);
@@ -790,6 +796,8 @@
.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
};
+ trace_f2fs_writepage(page, DATA);
+
if (page->index < end_index)
goto write;
@@ -798,10 +806,8 @@
* this page does not have to be written to disk.
*/
offset = i_size & (PAGE_CACHE_SIZE - 1);
- if ((page->index >= end_index + 1) || !offset) {
- inode_dec_dirty_dents(inode);
+ if ((page->index >= end_index + 1) || !offset)
goto out;
- }
zero_user_segment(page, offset, PAGE_CACHE_SIZE);
write:
@@ -810,7 +816,6 @@
/* Dentry blocks are controlled by checkpoint */
if (S_ISDIR(inode->i_mode)) {
- inode_dec_dirty_dents(inode);
err = do_write_data_page(page, &fio);
goto done;
}
@@ -832,15 +837,16 @@
clear_cold_data(page);
out:
+ inode_dec_dirty_dents(inode);
unlock_page(page);
if (need_balance_fs)
f2fs_balance_fs(sbi);
+ if (wbc->for_reclaim)
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
return 0;
redirty_out:
- wbc->pages_skipped++;
- account_page_redirty(page);
- set_page_dirty(page);
+ redirty_page_for_writepage(wbc, page);
return AOP_WRITEPAGE_ACTIVATE;
}
@@ -862,12 +868,15 @@
int ret;
long diff;
+ trace_f2fs_writepages(mapping->host, wbc, DATA);
+
/* deal with chardevs and other special file */
if (!mapping->a_ops->writepage)
return 0;
if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
- get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA))
+ get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA) &&
+ available_free_memory(sbi, DIRTY_DENTS))
goto skip_write;
diff = nr_pages_to_write(sbi, DATA, wbc);
@@ -903,6 +912,8 @@
struct dnode_of_data dn;
int err = 0;
+ trace_f2fs_write_begin(inode, pos, len, flags);
+
f2fs_balance_fs(sbi);
repeat:
err = f2fs_convert_inline_data(inode, pos + len);
@@ -912,6 +923,10 @@
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
+
+ /* to avoid latency during memory pressure */
+ unlock_page(page);
+
*pagep = page;
if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA)
@@ -923,10 +938,18 @@
f2fs_unlock_op(sbi);
if (err) {
- f2fs_put_page(page, 1);
+ f2fs_put_page(page, 0);
return err;
}
inline_data:
+ lock_page(page);
+ if (unlikely(page->mapping != mapping)) {
+ f2fs_put_page(page, 1);
+ goto repeat;
+ }
+
+ f2fs_wait_on_page_writeback(page, DATA);
+
if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
return 0;
@@ -978,6 +1001,8 @@
{
struct inode *inode = page->mapping->host;
+ trace_f2fs_write_end(inode, pos, len, copied);
+
SetPageUptodate(page);
set_page_dirty(page);
@@ -1022,6 +1047,9 @@
if (check_direct_IO(inode, rw, iov, offset, nr_segs))
return 0;
+ /* clear fsync mark to recover these blocks */
+ fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
+
return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
get_data_block);
}
@@ -1061,6 +1089,11 @@
static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
+ struct inode *inode = mapping->host;
+
+ if (f2fs_has_inline_data(inode))
+ return 0;
+
return generic_block_bmap(mapping, block, get_data_block);
}
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 972fd0e..966acb0 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -23,10 +23,10 @@
static unsigned int dir_buckets(unsigned int level, int dir_level)
{
- if (level < MAX_DIR_HASH_DEPTH / 2)
+ if (level + dir_level < MAX_DIR_HASH_DEPTH / 2)
return 1 << (level + dir_level);
else
- return 1 << ((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1);
+ return MAX_DIR_BUCKETS;
}
static unsigned int bucket_blocks(unsigned int level)
@@ -268,6 +268,8 @@
{
struct f2fs_inode *ri;
+ f2fs_wait_on_page_writeback(ipage, NODE);
+
/* copy name info. to this inode page */
ri = F2FS_INODE(ipage);
ri->i_namelen = cpu_to_le32(name->len);
@@ -637,11 +639,17 @@
struct f2fs_dentry_block *dentry_blk = NULL;
struct f2fs_dir_entry *de = NULL;
struct page *dentry_page = NULL;
+ struct file_ra_state *ra = &file->f_ra;
unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK);
unsigned char d_type = DT_UNKNOWN;
bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK);
+ /* readahead for multi pages of dir */
+ if (npages - n > 1 && !ra_has_index(ra, n))
+ page_cache_sync_readahead(inode->i_mapping, ra, file, n,
+ min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
+
for (; n < npages; n++) {
dentry_page = get_lock_data_page(inode, n);
if (IS_ERR(dentry_page))
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 2ecac83..e51c732 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -182,6 +182,8 @@
#define F2FS_LINK_MAX 32000 /* maximum link count per file */
+#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
+
/* for in-memory extent cache entry */
#define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */
@@ -218,6 +220,7 @@
nid_t i_xattr_nid; /* node id that contains xattrs */
unsigned long long xattr_ver; /* cp version of xattr modification */
struct extent_info ext; /* in-memory extent cache entry */
+ struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */
};
static inline void get_extent_info(struct extent_info *ext,
@@ -243,6 +246,7 @@
struct f2fs_nm_info {
block_t nat_blkaddr; /* base disk address of NAT */
nid_t max_nid; /* maximum possible node ids */
+ nid_t available_nids; /* maximum available node ids */
nid_t next_scan_nid; /* the next nid to be scanned */
unsigned int ram_thresh; /* control the memory footprint */
@@ -323,6 +327,15 @@
int ret;
};
+struct flush_cmd_control {
+ struct task_struct *f2fs_issue_flush; /* flush thread */
+ wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
+ struct flush_cmd *issue_list; /* list for command issue */
+ struct flush_cmd *dispatch_list; /* list for command dispatch */
+ spinlock_t issue_lock; /* for issue list lock */
+ struct flush_cmd *issue_tail; /* list tail of issue list */
+};
+
struct f2fs_sm_info {
struct sit_info *sit_info; /* whole segment information */
struct free_segmap_info *free_info; /* free segment information */
@@ -353,12 +366,8 @@
unsigned int min_ipu_util; /* in-place-update threshold */
/* for flush command control */
- struct task_struct *f2fs_issue_flush; /* flush thread */
- wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */
- struct flush_cmd *issue_list; /* list for command issue */
- struct flush_cmd *dispatch_list; /* list for command dispatch */
- spinlock_t issue_lock; /* for issue list lock */
- struct flush_cmd *issue_tail; /* list tail of issue list */
+ struct flush_cmd_control *cmd_control_info;
+
};
/*
@@ -755,9 +764,18 @@
static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
- int offset = (flag == NAT_BITMAP) ?
+ int offset;
+
+ if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) {
+ if (flag == NAT_BITMAP)
+ return &ckpt->sit_nat_version_bitmap;
+ else
+ return ((unsigned char *)ckpt + F2FS_BLKSIZE);
+ } else {
+ offset = (flag == NAT_BITMAP) ?
le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
- return &ckpt->sit_nat_version_bitmap + offset;
+ return &ckpt->sit_nat_version_bitmap + offset;
+ }
}
static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
@@ -958,6 +976,7 @@
enum {
FI_NEW_INODE, /* indicate newly allocated inode */
FI_DIRTY_INODE, /* indicate inode is dirty or not */
+ FI_DIRTY_DIR, /* indicate directory has dirty pages */
FI_INC_LINK, /* need to increment i_nlink */
FI_ACL_MODE, /* indicate acl mode */
FI_NO_ALLOC, /* should not allocate any blocks */
@@ -1071,6 +1090,12 @@
((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
(F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
+/* get offset of first page in next direct node */
+#define PGOFS_OF_NEXT_DNODE(pgofs, fi) \
+ ((pgofs < ADDRS_PER_INODE(fi)) ? ADDRS_PER_INODE(fi) : \
+ (pgofs - ADDRS_PER_INODE(fi) + ADDRS_PER_BLOCK) / \
+ ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi))
+
/*
* file.c
*/
@@ -1140,8 +1165,10 @@
struct dnode_of_data;
struct node_info;
+bool available_free_memory(struct f2fs_sb_info *, int);
int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
bool fsync_mark_done(struct f2fs_sb_info *, nid_t);
+void fsync_mark_clear(struct f2fs_sb_info *, nid_t);
void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
int truncate_inode_blocks(struct inode *, pgoff_t);
@@ -1176,9 +1203,12 @@
void f2fs_balance_fs(struct f2fs_sb_info *);
void f2fs_balance_fs_bg(struct f2fs_sb_info *);
int f2fs_issue_flush(struct f2fs_sb_info *);
+int create_flush_cmd_control(struct f2fs_sb_info *);
+void destroy_flush_cmd_control(struct f2fs_sb_info *);
void invalidate_blocks(struct f2fs_sb_info *, block_t);
void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
void clear_prefree_segments(struct f2fs_sb_info *);
+void discard_next_dnode(struct f2fs_sb_info *);
int npages_for_summary_flush(struct f2fs_sb_info *);
void allocate_new_segments(struct f2fs_sb_info *);
struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
@@ -1221,7 +1251,6 @@
void set_dirty_dir_page(struct inode *, struct page *);
void add_dirty_dir_inode(struct inode *);
void remove_dirty_dir_inode(struct inode *);
-struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t);
void sync_dirty_dir_inodes(struct f2fs_sb_info *);
void write_checkpoint(struct f2fs_sb_info *, bool);
void init_orphan_info(struct f2fs_sb_info *);
@@ -1242,6 +1271,7 @@
struct page *get_lock_data_page(struct inode *, pgoff_t);
struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
int do_write_data_page(struct page *, struct f2fs_io_info *);
+int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
/*
* gc.c
@@ -1391,5 +1421,6 @@
int f2fs_read_inline_data(struct inode *, struct page *);
int f2fs_convert_inline_data(struct inode *, pgoff_t);
int f2fs_write_inline_data(struct inode *, struct page *, unsigned int);
+void truncate_inline_data(struct inode *, u64);
int recover_inline_data(struct inode *, struct page *);
#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 60e7d54..9c49c59 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -19,6 +19,7 @@
#include <linux/compat.h>
#include <linux/uaccess.h>
#include <linux/mount.h>
+#include <linux/pagevec.h>
#include "f2fs.h"
#include "node.h"
@@ -194,6 +195,132 @@
return ret;
}
+static pgoff_t __get_first_dirty_index(struct address_space *mapping,
+ pgoff_t pgofs, int whence)
+{
+ struct pagevec pvec;
+ int nr_pages;
+
+ if (whence != SEEK_DATA)
+ return 0;
+
+ /* find first dirty page index */
+ pagevec_init(&pvec, 0);
+ nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1);
+ pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX;
+ pagevec_release(&pvec);
+ return pgofs;
+}
+
+static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
+ int whence)
+{
+ switch (whence) {
+ case SEEK_DATA:
+ if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
+ (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
+ return true;
+ break;
+ case SEEK_HOLE:
+ if (blkaddr == NULL_ADDR)
+ return true;
+ break;
+ }
+ return false;
+}
+
+static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
+{
+ struct inode *inode = file->f_mapping->host;
+ loff_t maxbytes = inode->i_sb->s_maxbytes;
+ struct dnode_of_data dn;
+ pgoff_t pgofs, end_offset, dirty;
+ loff_t data_ofs = offset;
+ loff_t isize;
+ int err = 0;
+
+ mutex_lock(&inode->i_mutex);
+
+ isize = i_size_read(inode);
+ if (offset >= isize)
+ goto fail;
+
+ /* handle inline data case */
+ if (f2fs_has_inline_data(inode)) {
+ if (whence == SEEK_HOLE)
+ data_ofs = isize;
+ goto found;
+ }
+
+ pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT);
+
+ dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence);
+
+ for (; data_ofs < isize; data_ofs = pgofs << PAGE_CACHE_SHIFT) {
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
+ if (err && err != -ENOENT) {
+ goto fail;
+ } else if (err == -ENOENT) {
+ /* direct node is not exist */
+ if (whence == SEEK_DATA) {
+ pgofs = PGOFS_OF_NEXT_DNODE(pgofs,
+ F2FS_I(inode));
+ continue;
+ } else {
+ goto found;
+ }
+ }
+
+ end_offset = IS_INODE(dn.node_page) ?
+ ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+
+ /* find data/hole in dnode block */
+ for (; dn.ofs_in_node < end_offset;
+ dn.ofs_in_node++, pgofs++,
+ data_ofs = pgofs << PAGE_CACHE_SHIFT) {
+ block_t blkaddr;
+ blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+
+ if (__found_offset(blkaddr, dirty, pgofs, whence)) {
+ f2fs_put_dnode(&dn);
+ goto found;
+ }
+ }
+ f2fs_put_dnode(&dn);
+ }
+
+ if (whence == SEEK_DATA)
+ goto fail;
+found:
+ if (whence == SEEK_HOLE && data_ofs > isize)
+ data_ofs = isize;
+ mutex_unlock(&inode->i_mutex);
+ return vfs_setpos(file, data_ofs, maxbytes);
+fail:
+ mutex_unlock(&inode->i_mutex);
+ return -ENXIO;
+}
+
+static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct inode *inode = file->f_mapping->host;
+ loff_t maxbytes = inode->i_sb->s_maxbytes;
+
+ switch (whence) {
+ case SEEK_SET:
+ case SEEK_CUR:
+ case SEEK_END:
+ return generic_file_llseek_size(file, offset, whence,
+ maxbytes, i_size_read(inode));
+ case SEEK_DATA:
+ case SEEK_HOLE:
+ return f2fs_seek_block(file, offset, whence);
+ }
+
+ return -EINVAL;
+}
+
static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
file_accessed(file);
@@ -242,6 +369,9 @@
unsigned offset = from & (PAGE_CACHE_SIZE - 1);
struct page *page;
+ if (f2fs_has_inline_data(inode))
+ return truncate_inline_data(inode, from);
+
if (!offset)
return;
@@ -288,10 +418,7 @@
return err;
}
- if (IS_INODE(dn.node_page))
- count = ADDRS_PER_INODE(F2FS_I(inode));
- else
- count = ADDRS_PER_BLOCK;
+ count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
count -= dn.ofs_in_node;
f2fs_bug_on(count < 0);
@@ -413,6 +540,7 @@
.listxattr = f2fs_listxattr,
.removexattr = generic_removexattr,
#endif
+ .fiemap = f2fs_fiemap,
};
static void fill_zero(struct inode *inode, pgoff_t index,
@@ -555,6 +683,7 @@
i_size_read(inode) < new_size) {
i_size_write(inode, new_size);
mark_inode_dirty(inode);
+ f2fs_write_inode(inode, NULL);
}
return ret;
@@ -678,7 +807,7 @@
#endif
const struct file_operations f2fs_file_operations = {
- .llseek = generic_file_llseek,
+ .llseek = f2fs_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 383db1f..1bba522 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -81,8 +81,10 @@
f2fs_lock_op(sbi);
ipage = get_node_page(sbi, inode->i_ino);
- if (IS_ERR(ipage))
- return PTR_ERR(ipage);
+ if (IS_ERR(ipage)) {
+ err = PTR_ERR(ipage);
+ goto out;
+ }
/*
* i_addr[0] is not used for inline data,
@@ -90,11 +92,10 @@
*/
set_new_dnode(&dn, inode, ipage, NULL, 0);
err = f2fs_reserve_block(&dn, 0);
- if (err) {
- f2fs_unlock_op(sbi);
- return err;
- }
+ if (err)
+ goto out;
+ f2fs_wait_on_page_writeback(page, DATA);
zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
/* Copy the whole inline data block */
@@ -118,6 +119,7 @@
sync_inode_page(&dn);
f2fs_put_dnode(&dn);
+out:
f2fs_unlock_op(sbi);
return err;
}
@@ -132,7 +134,7 @@
else if (to_size <= MAX_INLINE_DATA)
return 0;
- page = grab_cache_page_write_begin(inode->i_mapping, 0, AOP_FLAG_NOFS);
+ page = grab_cache_page(inode->i_mapping, 0);
if (!page)
return -ENOMEM;
@@ -155,6 +157,7 @@
return err;
ipage = dn.inode_page;
+ f2fs_wait_on_page_writeback(ipage, NODE);
zero_user_segment(ipage, INLINE_DATA_OFFSET,
INLINE_DATA_OFFSET + MAX_INLINE_DATA);
src_addr = kmap(page);
@@ -175,6 +178,26 @@
return 0;
}
+void truncate_inline_data(struct inode *inode, u64 from)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+ struct page *ipage;
+
+ if (from >= MAX_INLINE_DATA)
+ return;
+
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage))
+ return;
+
+ f2fs_wait_on_page_writeback(ipage, NODE);
+
+ zero_user_segment(ipage, INLINE_DATA_OFFSET + from,
+ INLINE_DATA_OFFSET + MAX_INLINE_DATA);
+ set_page_dirty(ipage);
+ f2fs_put_page(ipage, 1);
+}
+
int recover_inline_data(struct inode *inode, struct page *npage)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
@@ -199,6 +222,8 @@
ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(IS_ERR(ipage));
+ f2fs_wait_on_page_writeback(ipage, NODE);
+
src_addr = inline_data_addr(npage);
dst_addr = inline_data_addr(ipage);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
@@ -210,6 +235,7 @@
if (f2fs_has_inline_data(inode)) {
ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(IS_ERR(ipage));
+ f2fs_wait_on_page_writeback(ipage, NODE);
zero_user_segment(ipage, INLINE_DATA_OFFSET,
INLINE_DATA_OFFSET + MAX_INLINE_DATA);
clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index ee829d3..adc622c 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -12,6 +12,7 @@
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
+#include <linux/bitops.h>
#include "f2fs.h"
#include "node.h"
@@ -21,20 +22,20 @@
void f2fs_set_inode_flags(struct inode *inode)
{
unsigned int flags = F2FS_I(inode)->i_flags;
-
- inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE |
- S_NOATIME | S_DIRSYNC);
+ unsigned int new_fl = 0;
if (flags & FS_SYNC_FL)
- inode->i_flags |= S_SYNC;
+ new_fl |= S_SYNC;
if (flags & FS_APPEND_FL)
- inode->i_flags |= S_APPEND;
+ new_fl |= S_APPEND;
if (flags & FS_IMMUTABLE_FL)
- inode->i_flags |= S_IMMUTABLE;
+ new_fl |= S_IMMUTABLE;
if (flags & FS_NOATIME_FL)
- inode->i_flags |= S_NOATIME;
+ new_fl |= S_NOATIME;
if (flags & FS_DIRSYNC_FL)
- inode->i_flags |= S_DIRSYNC;
+ new_fl |= S_DIRSYNC;
+ set_mask_bits(&inode->i_flags,
+ S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl);
}
static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -294,4 +295,5 @@
sb_end_intwrite(inode->i_sb);
no_delete:
clear_inode(inode);
+ invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index a9409d1..9138c32 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -41,18 +41,9 @@
}
f2fs_unlock_op(sbi);
- inode->i_uid = current_fsuid();
-
- if (dir->i_mode & S_ISGID) {
- inode->i_gid = dir->i_gid;
- if (S_ISDIR(mode))
- mode |= S_ISGID;
- } else {
- inode->i_gid = current_fsgid();
- }
+ inode_init_owner(inode, dir, mode);
inode->i_ino = ino;
- inode->i_mode = mode;
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_generation = sbi->s_next_generation++;
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 57caa6e..9dfb9a0 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -26,20 +26,26 @@
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
-static inline bool available_free_memory(struct f2fs_nm_info *nm_i, int type)
+bool available_free_memory(struct f2fs_sb_info *sbi, int type)
{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
struct sysinfo val;
unsigned long mem_size = 0;
+ bool res = false;
si_meminfo(&val);
- if (type == FREE_NIDS)
- mem_size = nm_i->fcnt * sizeof(struct free_nid);
- else if (type == NAT_ENTRIES)
- mem_size += nm_i->nat_cnt * sizeof(struct nat_entry);
- mem_size >>= 12;
-
- /* give 50:50 memory for free nids and nat caches respectively */
- return (mem_size < ((val.totalram * nm_i->ram_thresh) >> 11));
+ /* give 25%, 25%, 50% memory for each components respectively */
+ if (type == FREE_NIDS) {
+ mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12;
+ res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
+ } else if (type == NAT_ENTRIES) {
+ mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12;
+ res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
+ } else if (type == DIRTY_DENTS) {
+ mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
+ res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1);
+ }
+ return res;
}
static void clear_node_page_dirty(struct page *page)
@@ -147,6 +153,18 @@
return fsync_done;
}
+void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct nat_entry *e;
+
+ write_lock(&nm_i->nat_tree_lock);
+ e = __lookup_nat_cache(nm_i, nid);
+ if (e)
+ e->fsync_done = false;
+ write_unlock(&nm_i->nat_tree_lock);
+}
+
static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
{
struct nat_entry *new;
@@ -179,9 +197,7 @@
write_unlock(&nm_i->nat_tree_lock);
goto retry;
}
- nat_set_blkaddr(e, le32_to_cpu(ne->block_addr));
- nat_set_ino(e, le32_to_cpu(ne->ino));
- nat_set_version(e, ne->version);
+ node_info_from_raw_nat(&e->ni, ne);
}
write_unlock(&nm_i->nat_tree_lock);
}
@@ -243,7 +259,7 @@
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- if (available_free_memory(nm_i, NAT_ENTRIES))
+ if (available_free_memory(sbi, NAT_ENTRIES))
return 0;
write_lock(&nm_i->nat_tree_lock);
@@ -849,8 +865,7 @@
if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
return ERR_PTR(-EPERM);
- page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
- dn->nid, AOP_FLAG_NOFS);
+ page = grab_cache_page(NODE_MAPPING(sbi), dn->nid);
if (!page)
return ERR_PTR(-ENOMEM);
@@ -867,6 +882,7 @@
new_ni.ino = dn->inode->i_ino;
set_node_addr(sbi, &new_ni, NEW_ADDR, false);
+ f2fs_wait_on_page_writeback(page, NODE);
fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
set_cold_node(dn->inode, page);
SetPageUptodate(page);
@@ -946,8 +962,7 @@
struct page *page;
int err;
repeat:
- page = grab_cache_page_write_begin(NODE_MAPPING(sbi),
- nid, AOP_FLAG_NOFS);
+ page = grab_cache_page(NODE_MAPPING(sbi), nid);
if (!page)
return ERR_PTR(-ENOMEM);
@@ -1194,6 +1209,8 @@
.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
};
+ trace_f2fs_writepage(page, NODE);
+
if (unlikely(sbi->por_doing))
goto redirty_out;
@@ -1225,10 +1242,7 @@
return 0;
redirty_out:
- dec_page_count(sbi, F2FS_DIRTY_NODES);
- wbc->pages_skipped++;
- account_page_redirty(page);
- set_page_dirty(page);
+ redirty_page_for_writepage(wbc, page);
return AOP_WRITEPAGE_ACTIVATE;
}
@@ -1238,6 +1252,8 @@
struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
long diff;
+ trace_f2fs_writepages(mapping->host, wbc, NODE);
+
/* balancing f2fs's metadata in background */
f2fs_balance_fs_bg(sbi);
@@ -1313,13 +1329,14 @@
radix_tree_delete(&nm_i->free_nid_root, i->nid);
}
-static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
+static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
struct nat_entry *ne;
bool allocated = false;
- if (!available_free_memory(nm_i, FREE_NIDS))
+ if (!available_free_memory(sbi, FREE_NIDS))
return -1;
/* 0 nid should not be used */
@@ -1372,9 +1389,10 @@
kmem_cache_free(free_nid_slab, i);
}
-static void scan_nat_page(struct f2fs_nm_info *nm_i,
+static void scan_nat_page(struct f2fs_sb_info *sbi,
struct page *nat_page, nid_t start_nid)
{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
struct f2fs_nat_block *nat_blk = page_address(nat_page);
block_t blk_addr;
int i;
@@ -1389,7 +1407,7 @@
blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
f2fs_bug_on(blk_addr == NEW_ADDR);
if (blk_addr == NULL_ADDR) {
- if (add_free_nid(nm_i, start_nid, true) < 0)
+ if (add_free_nid(sbi, start_nid, true) < 0)
break;
}
}
@@ -1413,7 +1431,7 @@
while (1) {
struct page *page = get_current_nat_page(sbi, nid);
- scan_nat_page(nm_i, page, nid);
+ scan_nat_page(sbi, page, nid);
f2fs_put_page(page, 1);
nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
@@ -1433,7 +1451,7 @@
block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
nid = le32_to_cpu(nid_in_journal(sum, i));
if (addr == NULL_ADDR)
- add_free_nid(nm_i, nid, true);
+ add_free_nid(sbi, nid, true);
else
remove_free_nid(nm_i, nid);
}
@@ -1450,7 +1468,7 @@
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i = NULL;
retry:
- if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid))
+ if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))
return false;
spin_lock(&nm_i->free_nid_list_lock);
@@ -1510,7 +1528,7 @@
spin_lock(&nm_i->free_nid_list_lock);
i = __lookup_free_nid_list(nm_i, nid);
f2fs_bug_on(!i || i->state != NID_ALLOC);
- if (!available_free_memory(nm_i, FREE_NIDS)) {
+ if (!available_free_memory(sbi, FREE_NIDS)) {
__del_from_free_nid_list(nm_i, i);
need_free = true;
} else {
@@ -1532,7 +1550,7 @@
clear_node_page_dirty(page);
}
-void recover_inline_xattr(struct inode *inode, struct page *page)
+static void recover_inline_xattr(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
void *src_addr, *dst_addr;
@@ -1557,6 +1575,7 @@
src_addr = inline_xattr_addr(page);
inline_size = inline_xattr_size(inode);
+ f2fs_wait_on_page_writeback(ipage, NODE);
memcpy(dst_addr, src_addr, inline_size);
update_inode(inode, ipage);
@@ -1612,6 +1631,11 @@
struct node_info old_ni, new_ni;
struct page *ipage;
+ get_node_info(sbi, ino, &old_ni);
+
+ if (unlikely(old_ni.blk_addr != NULL_ADDR))
+ return -EINVAL;
+
ipage = grab_cache_page(NODE_MAPPING(sbi), ino);
if (!ipage)
return -ENOMEM;
@@ -1619,7 +1643,6 @@
/* Should not use this inode from free nid list */
remove_free_nid(NM_I(sbi), ino);
- get_node_info(sbi, ino, &old_ni);
SetPageUptodate(ipage);
fill_node_footer(ipage, ino, ino, 0, true);
@@ -1645,35 +1668,29 @@
/*
* ra_sum_pages() merge contiguous pages into one bio and submit.
- * these pre-readed pages are linked in pages list.
+ * these pre-readed pages are alloced in bd_inode's mapping tree.
*/
-static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
+static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages,
int start, int nrpages)
{
- struct page *page;
- int page_idx = start;
+ struct inode *inode = sbi->sb->s_bdev->bd_inode;
+ struct address_space *mapping = inode->i_mapping;
+ int i, page_idx = start;
struct f2fs_io_info fio = {
.type = META,
.rw = READ_SYNC | REQ_META | REQ_PRIO
};
- for (; page_idx < start + nrpages; page_idx++) {
- /* alloc temporal page for read node summary info*/
- page = alloc_page(GFP_F2FS_ZERO);
- if (!page)
+ for (i = 0; page_idx < start + nrpages; page_idx++, i++) {
+ /* alloc page in bd_inode for reading node summary info */
+ pages[i] = grab_cache_page(mapping, page_idx);
+ if (!pages[i])
break;
-
- lock_page(page);
- page->index = page_idx;
- list_add_tail(&page->lru, pages);
+ f2fs_submit_page_mbio(sbi, pages[i], page_idx, &fio);
}
- list_for_each_entry(page, pages, lru)
- f2fs_submit_page_mbio(sbi, page, page->index, &fio);
-
f2fs_submit_merged_bio(sbi, META, READ);
-
- return page_idx - start;
+ return i;
}
int restore_node_summary(struct f2fs_sb_info *sbi,
@@ -1681,11 +1698,11 @@
{
struct f2fs_node *rn;
struct f2fs_summary *sum_entry;
- struct page *page, *tmp;
+ struct inode *inode = sbi->sb->s_bdev->bd_inode;
block_t addr;
int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
- int i, last_offset, nrpages, err = 0;
- LIST_HEAD(page_list);
+ struct page *pages[bio_blocks];
+ int i, idx, last_offset, nrpages, err = 0;
/* scan the node segment */
last_offset = sbi->blocks_per_seg;
@@ -1696,29 +1713,31 @@
nrpages = min(last_offset - i, bio_blocks);
/* read ahead node pages */
- nrpages = ra_sum_pages(sbi, &page_list, addr, nrpages);
+ nrpages = ra_sum_pages(sbi, pages, addr, nrpages);
if (!nrpages)
return -ENOMEM;
- list_for_each_entry_safe(page, tmp, &page_list, lru) {
+ for (idx = 0; idx < nrpages; idx++) {
if (err)
goto skip;
- lock_page(page);
- if (unlikely(!PageUptodate(page))) {
+ lock_page(pages[idx]);
+ if (unlikely(!PageUptodate(pages[idx]))) {
err = -EIO;
} else {
- rn = F2FS_NODE(page);
+ rn = F2FS_NODE(pages[idx]);
sum_entry->nid = rn->footer.nid;
sum_entry->version = 0;
sum_entry->ofs_in_node = 0;
sum_entry++;
}
- unlock_page(page);
+ unlock_page(pages[idx]);
skip:
- list_del(&page->lru);
- __free_pages(page, 0);
+ page_cache_release(pages[idx]);
}
+
+ invalidate_mapping_pages(inode->i_mapping, addr,
+ addr + nrpages);
}
return err;
}
@@ -1756,9 +1775,7 @@
write_unlock(&nm_i->nat_tree_lock);
goto retry;
}
- nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr));
- nat_set_ino(ne, le32_to_cpu(raw_ne.ino));
- nat_set_version(ne, raw_ne.version);
+ node_info_from_raw_nat(&ne->ni, &raw_ne);
__set_nat_cache_dirty(nm_i, ne);
write_unlock(&nm_i->nat_tree_lock);
}
@@ -1791,7 +1808,6 @@
nid_t nid;
struct f2fs_nat_entry raw_ne;
int offset = -1;
- block_t new_blkaddr;
if (nat_get_blkaddr(ne) == NEW_ADDR)
continue;
@@ -1827,11 +1843,7 @@
f2fs_bug_on(!nat_blk);
raw_ne = nat_blk->entries[nid - start_nid];
flush_now:
- new_blkaddr = nat_get_blkaddr(ne);
-
- raw_ne.ino = cpu_to_le32(nat_get_ino(ne));
- raw_ne.block_addr = cpu_to_le32(new_blkaddr);
- raw_ne.version = nat_get_version(ne);
+ raw_nat_from_node_info(&raw_ne, &ne->ni);
if (offset < 0) {
nat_blk->entries[nid - start_nid] = raw_ne;
@@ -1841,7 +1853,7 @@
}
if (nat_get_blkaddr(ne) == NULL_ADDR &&
- add_free_nid(NM_I(sbi), nid, false) <= 0) {
+ add_free_nid(sbi, nid, false) <= 0) {
write_lock(&nm_i->nat_tree_lock);
__del_from_nat_cache(nm_i, ne);
write_unlock(&nm_i->nat_tree_lock);
@@ -1869,8 +1881,10 @@
nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
+ nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
+
/* not used nids: 0, node, meta, (and root counted as valid node) */
- nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks - 3;
+ nm_i->available_nids = nm_i->max_nid - 3;
nm_i->fcnt = 0;
nm_i->nat_cnt = 0;
nm_i->ram_thresh = DEF_RAM_THRESHOLD;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 5decc1a..7281112 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -59,12 +59,12 @@
do { \
ne->checkpointed = false; \
list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \
- } while (0);
+ } while (0)
#define __clear_nat_cache_dirty(nm_i, ne) \
do { \
ne->checkpointed = true; \
list_move_tail(&ne->list, &nm_i->nat_entries); \
- } while (0);
+ } while (0)
#define inc_node_version(version) (++version)
static inline void node_info_from_raw_nat(struct node_info *ni,
@@ -75,9 +75,18 @@
ni->version = raw_ne->version;
}
-enum nid_type {
+static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
+ struct node_info *ni)
+{
+ raw_ne->ino = cpu_to_le32(ni->ino);
+ raw_ne->block_addr = cpu_to_le32(ni->blk_addr);
+ raw_ne->version = ni->version;
+}
+
+enum mem_type {
FREE_NIDS, /* indicates the free nid list */
- NAT_ENTRIES /* indicates the cached nat entry */
+ NAT_ENTRIES, /* indicates the cached nat entry */
+ DIRTY_DENTS /* indicates dirty dentry pages */
};
/*
@@ -263,7 +272,7 @@
{
struct f2fs_node *rn = F2FS_NODE(p);
- wait_on_page_writeback(p);
+ f2fs_wait_on_page_writeback(p, NODE);
if (i)
rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index b1ae89f..a112368 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -46,15 +46,10 @@
struct inode *dir, *einode;
int err = 0;
- dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino);
- if (!dir) {
- dir = f2fs_iget(inode->i_sb, pino);
- if (IS_ERR(dir)) {
- err = PTR_ERR(dir);
- goto out;
- }
- set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
- add_dirty_dir_inode(dir);
+ dir = f2fs_iget(inode->i_sb, pino);
+ if (IS_ERR(dir)) {
+ err = PTR_ERR(dir);
+ goto out;
}
name.len = le32_to_cpu(raw_inode->i_namelen);
@@ -63,7 +58,7 @@
if (unlikely(name.len > F2FS_NAME_LEN)) {
WARN_ON(1);
err = -ENAMETOOLONG;
- goto out;
+ goto out_err;
}
retry:
de = f2fs_find_entry(dir, &name, &page);
@@ -73,7 +68,8 @@
einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
if (IS_ERR(einode)) {
WARN_ON(1);
- if (PTR_ERR(einode) == -ENOENT)
+ err = PTR_ERR(einode);
+ if (err == -ENOENT)
err = -EEXIST;
goto out_unmap_put;
}
@@ -87,11 +83,23 @@
goto retry;
}
err = __f2fs_add_link(dir, &name, inode);
+ if (err)
+ goto out_err;
+
+ if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
+ iput(dir);
+ } else {
+ add_dirty_dir_inode(dir);
+ set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
+ }
+
goto out;
out_unmap_put:
kunmap(page);
f2fs_put_page(page, 0);
+out_err:
+ iput(dir);
out:
f2fs_msg(inode->i_sb, KERN_NOTICE,
"%s: ino = %x, name = %s, dir = %lx, err = %d",
@@ -299,10 +307,7 @@
goto out;
start = start_bidx_of_node(ofs_of_node(page), fi);
- if (IS_INODE(page))
- end = start + ADDRS_PER_INODE(fi);
- else
- end = start + ADDRS_PER_BLOCK;
+ end = start + ADDRS_PER_PAGE(page, fi);
f2fs_lock_op(sbi);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 085f548..f25f0e0 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -25,7 +25,6 @@
#define __reverse_ffz(x) __reverse_ffs(~(x))
static struct kmem_cache *discard_entry_slab;
-static struct kmem_cache *flush_cmd_slab;
/*
* __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
@@ -200,20 +199,20 @@
static int issue_flush_thread(void *data)
{
struct f2fs_sb_info *sbi = data;
- struct f2fs_sm_info *sm_i = SM_I(sbi);
- wait_queue_head_t *q = &sm_i->flush_wait_queue;
+ struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+ wait_queue_head_t *q = &fcc->flush_wait_queue;
repeat:
if (kthread_should_stop())
return 0;
- spin_lock(&sm_i->issue_lock);
- if (sm_i->issue_list) {
- sm_i->dispatch_list = sm_i->issue_list;
- sm_i->issue_list = sm_i->issue_tail = NULL;
+ spin_lock(&fcc->issue_lock);
+ if (fcc->issue_list) {
+ fcc->dispatch_list = fcc->issue_list;
+ fcc->issue_list = fcc->issue_tail = NULL;
}
- spin_unlock(&sm_i->issue_lock);
+ spin_unlock(&fcc->issue_lock);
- if (sm_i->dispatch_list) {
+ if (fcc->dispatch_list) {
struct bio *bio = bio_alloc(GFP_NOIO, 0);
struct flush_cmd *cmd, *next;
int ret;
@@ -221,47 +220,79 @@
bio->bi_bdev = sbi->sb->s_bdev;
ret = submit_bio_wait(WRITE_FLUSH, bio);
- for (cmd = sm_i->dispatch_list; cmd; cmd = next) {
+ for (cmd = fcc->dispatch_list; cmd; cmd = next) {
cmd->ret = ret;
next = cmd->next;
complete(&cmd->wait);
}
- sm_i->dispatch_list = NULL;
+ bio_put(bio);
+ fcc->dispatch_list = NULL;
}
- wait_event_interruptible(*q, kthread_should_stop() || sm_i->issue_list);
+ wait_event_interruptible(*q,
+ kthread_should_stop() || fcc->issue_list);
goto repeat;
}
int f2fs_issue_flush(struct f2fs_sb_info *sbi)
{
- struct f2fs_sm_info *sm_i = SM_I(sbi);
- struct flush_cmd *cmd;
- int ret;
+ struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+ struct flush_cmd cmd;
if (!test_opt(sbi, FLUSH_MERGE))
return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
- cmd = f2fs_kmem_cache_alloc(flush_cmd_slab, GFP_ATOMIC);
- cmd->next = NULL;
- cmd->ret = 0;
- init_completion(&cmd->wait);
+ init_completion(&cmd.wait);
+ cmd.next = NULL;
- spin_lock(&sm_i->issue_lock);
- if (sm_i->issue_list)
- sm_i->issue_tail->next = cmd;
+ spin_lock(&fcc->issue_lock);
+ if (fcc->issue_list)
+ fcc->issue_tail->next = &cmd;
else
- sm_i->issue_list = cmd;
- sm_i->issue_tail = cmd;
- spin_unlock(&sm_i->issue_lock);
+ fcc->issue_list = &cmd;
+ fcc->issue_tail = &cmd;
+ spin_unlock(&fcc->issue_lock);
- if (!sm_i->dispatch_list)
- wake_up(&sm_i->flush_wait_queue);
+ if (!fcc->dispatch_list)
+ wake_up(&fcc->flush_wait_queue);
- wait_for_completion(&cmd->wait);
- ret = cmd->ret;
- kmem_cache_free(flush_cmd_slab, cmd);
- return ret;
+ wait_for_completion(&cmd.wait);
+
+ return cmd.ret;
+}
+
+int create_flush_cmd_control(struct f2fs_sb_info *sbi)
+{
+ dev_t dev = sbi->sb->s_bdev->bd_dev;
+ struct flush_cmd_control *fcc;
+ int err = 0;
+
+ fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
+ if (!fcc)
+ return -ENOMEM;
+ spin_lock_init(&fcc->issue_lock);
+ init_waitqueue_head(&fcc->flush_wait_queue);
+ fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
+ "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
+ if (IS_ERR(fcc->f2fs_issue_flush)) {
+ err = PTR_ERR(fcc->f2fs_issue_flush);
+ kfree(fcc);
+ return err;
+ }
+ sbi->sm_info->cmd_control_info = fcc;
+
+ return err;
+}
+
+void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
+{
+ struct flush_cmd_control *fcc =
+ sbi->sm_info->cmd_control_info;
+
+ if (fcc && fcc->f2fs_issue_flush)
+ kthread_stop(fcc->f2fs_issue_flush);
+ kfree(fcc);
+ sbi->sm_info->cmd_control_info = NULL;
}
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
@@ -336,13 +367,26 @@
mutex_unlock(&dirty_i->seglist_lock);
}
-static void f2fs_issue_discard(struct f2fs_sb_info *sbi,
+static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
block_t blkstart, block_t blklen)
{
sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart);
sector_t len = SECTOR_FROM_BLOCK(sbi, blklen);
- blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
+ return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
+}
+
+void discard_next_dnode(struct f2fs_sb_info *sbi)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
+ block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+
+ if (f2fs_issue_discard(sbi, blkaddr, 1)) {
+ struct page *page = grab_meta_page(sbi, blkaddr);
+ /* zero-filled page */
+ set_page_dirty(page);
+ f2fs_put_page(page, 1);
+ }
}
static void add_discard_addrs(struct f2fs_sb_info *sbi,
@@ -1832,7 +1876,6 @@
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
- dev_t dev = sbi->sb->s_bdev->bd_dev;
struct f2fs_sm_info *sm_info;
int err;
@@ -1860,14 +1903,10 @@
sm_info->nr_discards = 0;
sm_info->max_discards = 0;
- if (test_opt(sbi, FLUSH_MERGE)) {
- spin_lock_init(&sm_info->issue_lock);
- init_waitqueue_head(&sm_info->flush_wait_queue);
-
- sm_info->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
- "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
- if (IS_ERR(sm_info->f2fs_issue_flush))
- return PTR_ERR(sm_info->f2fs_issue_flush);
+ if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
+ err = create_flush_cmd_control(sbi);
+ if (err)
+ return err;
}
err = build_sit_info(sbi);
@@ -1976,10 +2015,10 @@
void destroy_segment_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_sm_info *sm_info = SM_I(sbi);
+
if (!sm_info)
return;
- if (sm_info->f2fs_issue_flush)
- kthread_stop(sm_info->f2fs_issue_flush);
+ destroy_flush_cmd_control(sbi);
destroy_dirty_segmap(sbi);
destroy_curseg(sbi);
destroy_free_segmap(sbi);
@@ -1994,17 +2033,10 @@
sizeof(struct discard_entry));
if (!discard_entry_slab)
return -ENOMEM;
- flush_cmd_slab = f2fs_kmem_cache_create("flush_command",
- sizeof(struct flush_cmd));
- if (!flush_cmd_slab) {
- kmem_cache_destroy(discard_entry_slab);
- return -ENOMEM;
- }
return 0;
}
void destroy_segment_manager_caches(void)
{
kmem_cache_destroy(discard_entry_slab);
- kmem_cache_destroy(flush_cmd_slab);
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index c756923..b2b1863 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -514,7 +514,7 @@
{
struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
- if (!(root->d_sb->s_flags & MS_RDONLY) && test_opt(sbi, BG_GC))
+ if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC))
seq_printf(seq, ",background_gc=%s", "on");
else
seq_printf(seq, ",background_gc=%s", "off");
@@ -542,7 +542,7 @@
seq_puts(seq, ",disable_ext_identify");
if (test_opt(sbi, INLINE_DATA))
seq_puts(seq, ",inline_data");
- if (test_opt(sbi, FLUSH_MERGE))
+ if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
seq_puts(seq, ",flush_merge");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
@@ -594,6 +594,8 @@
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct f2fs_mount_info org_mount_opt;
int err, active_logs;
+ bool need_restart_gc = false;
+ bool need_stop_gc = false;
sync_filesystem(sb);
@@ -611,7 +613,7 @@
/*
* Previous and new state of filesystem is RO,
- * so no point in checking GC conditions.
+ * so skip checking GC and FLUSH_MERGE conditions.
*/
if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY))
goto skip;
@@ -625,18 +627,40 @@
if (sbi->gc_thread) {
stop_gc_thread(sbi);
f2fs_sync_fs(sb, 1);
+ need_restart_gc = true;
}
} else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) {
err = start_gc_thread(sbi);
if (err)
goto restore_opts;
+ need_stop_gc = true;
+ }
+
+ /*
+ * We stop issue flush thread if FS is mounted as RO
+ * or if flush_merge is not passed in mount option.
+ */
+ if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
+ destroy_flush_cmd_control(sbi);
+ } else if (test_opt(sbi, FLUSH_MERGE) &&
+ !sbi->sm_info->cmd_control_info) {
+ err = create_flush_cmd_control(sbi);
+ if (err)
+ goto restore_gc;
}
skip:
/* Update the POSIXACL Flag */
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
return 0;
-
+restore_gc:
+ if (need_restart_gc) {
+ if (start_gc_thread(sbi))
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "background gc thread is stop");
+ } else if (need_stop_gc) {
+ stop_gc_thread(sbi);
+ }
restore_opts:
sbi->mount_opt = org_mount_opt;
sbi->active_logs = active_logs;
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 503c245..8bea941 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -26,7 +26,7 @@
#include "xattr.h"
static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list,
- size_t list_size, const char *name, size_t name_len, int type)
+ size_t list_size, const char *name, size_t len, int type)
{
struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
int total_len, prefix_len = 0;
@@ -53,11 +53,11 @@
return -EINVAL;
}
- total_len = prefix_len + name_len + 1;
+ total_len = prefix_len + len + 1;
if (list && total_len <= list_size) {
memcpy(list, prefix, prefix_len);
- memcpy(list + prefix_len, name, name_len);
- list[prefix_len + name_len] = '\0';
+ memcpy(list + prefix_len, name, len);
+ list[prefix_len + len] = '\0';
}
return total_len;
}
@@ -108,11 +108,12 @@
if (strcmp(name, "") == 0)
return -EINVAL;
- return f2fs_setxattr(dentry->d_inode, type, name, value, size, NULL);
+ return f2fs_setxattr(dentry->d_inode, type, name,
+ value, size, NULL, flags);
}
static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list,
- size_t list_size, const char *name, size_t name_len, int type)
+ size_t list_size, const char *name, size_t len, int type)
{
const char *xname = F2FS_SYSTEM_ADVISE_PREFIX;
size_t size;
@@ -155,9 +156,6 @@
}
#ifdef CONFIG_F2FS_FS_SECURITY
-static int __f2fs_setxattr(struct inode *inode, int name_index,
- const char *name, const void *value, size_t value_len,
- struct page *ipage);
static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
void *page)
{
@@ -165,9 +163,9 @@
int err = 0;
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
- err = __f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY,
+ err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY,
xattr->name, xattr->value,
- xattr->value_len, (struct page *)page);
+ xattr->value_len, (struct page *)page, 0);
if (err < 0)
break;
}
@@ -241,26 +239,26 @@
NULL,
};
-static inline const struct xattr_handler *f2fs_xattr_handler(int name_index)
+static inline const struct xattr_handler *f2fs_xattr_handler(int index)
{
const struct xattr_handler *handler = NULL;
- if (name_index > 0 && name_index < ARRAY_SIZE(f2fs_xattr_handler_map))
- handler = f2fs_xattr_handler_map[name_index];
+ if (index > 0 && index < ARRAY_SIZE(f2fs_xattr_handler_map))
+ handler = f2fs_xattr_handler_map[index];
return handler;
}
-static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int name_index,
- size_t name_len, const char *name)
+static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
+ size_t len, const char *name)
{
struct f2fs_xattr_entry *entry;
list_for_each_xattr(entry, base_addr) {
- if (entry->e_name_index != name_index)
+ if (entry->e_name_index != index)
continue;
- if (entry->e_name_len != name_len)
+ if (entry->e_name_len != len)
continue;
- if (!memcmp(entry->e_name, name, name_len))
+ if (!memcmp(entry->e_name, name, len))
break;
}
return entry;
@@ -347,6 +345,7 @@
if (ipage) {
inline_addr = inline_xattr_addr(ipage);
+ f2fs_wait_on_page_writeback(ipage, NODE);
} else {
page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(page)) {
@@ -354,6 +353,7 @@
return PTR_ERR(page);
}
inline_addr = inline_xattr_addr(page);
+ f2fs_wait_on_page_writeback(page, NODE);
}
memcpy(inline_addr, txattr_addr, inline_size);
f2fs_put_page(page, 1);
@@ -374,6 +374,7 @@
return PTR_ERR(xpage);
}
f2fs_bug_on(new_nid);
+ f2fs_wait_on_page_writeback(xpage, NODE);
} else {
struct dnode_of_data dn;
set_new_dnode(&dn, inode, NULL, NULL, new_nid);
@@ -396,42 +397,43 @@
return 0;
}
-int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
+int f2fs_getxattr(struct inode *inode, int index, const char *name,
void *buffer, size_t buffer_size)
{
struct f2fs_xattr_entry *entry;
void *base_addr;
int error = 0;
- size_t value_len, name_len;
+ size_t size, len;
if (name == NULL)
return -EINVAL;
- name_len = strlen(name);
- if (name_len > F2FS_NAME_LEN)
+
+ len = strlen(name);
+ if (len > F2FS_NAME_LEN)
return -ERANGE;
base_addr = read_all_xattrs(inode, NULL);
if (!base_addr)
return -ENOMEM;
- entry = __find_xattr(base_addr, name_index, name_len, name);
+ entry = __find_xattr(base_addr, index, len, name);
if (IS_XATTR_LAST_ENTRY(entry)) {
error = -ENODATA;
goto cleanup;
}
- value_len = le16_to_cpu(entry->e_value_size);
+ size = le16_to_cpu(entry->e_value_size);
- if (buffer && value_len > buffer_size) {
+ if (buffer && size > buffer_size) {
error = -ERANGE;
goto cleanup;
}
if (buffer) {
char *pval = entry->e_name + entry->e_name_len;
- memcpy(buffer, pval, value_len);
+ memcpy(buffer, pval, size);
}
- error = value_len;
+ error = size;
cleanup:
kzfree(base_addr);
@@ -475,15 +477,15 @@
return error;
}
-static int __f2fs_setxattr(struct inode *inode, int name_index,
- const char *name, const void *value, size_t value_len,
- struct page *ipage)
+static int __f2fs_setxattr(struct inode *inode, int index,
+ const char *name, const void *value, size_t size,
+ struct page *ipage, int flags)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_xattr_entry *here, *last;
void *base_addr;
int found, newsize;
- size_t name_len;
+ size_t len;
__u32 new_hsize;
int error = -ENOMEM;
@@ -491,11 +493,11 @@
return -EINVAL;
if (value == NULL)
- value_len = 0;
+ size = 0;
- name_len = strlen(name);
+ len = strlen(name);
- if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode))
+ if (len > F2FS_NAME_LEN || size > MAX_VALUE_LEN(inode))
return -ERANGE;
base_addr = read_all_xattrs(inode, ipage);
@@ -503,16 +505,23 @@
goto exit;
/* find entry with wanted name. */
- here = __find_xattr(base_addr, name_index, name_len, name);
+ here = __find_xattr(base_addr, index, len, name);
found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
- last = here;
+ if ((flags & XATTR_REPLACE) && !found) {
+ error = -ENODATA;
+ goto exit;
+ } else if ((flags & XATTR_CREATE) && found) {
+ error = -EEXIST;
+ goto exit;
+ }
+
+ last = here;
while (!IS_XATTR_LAST_ENTRY(last))
last = XATTR_NEXT_ENTRY(last);
- newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) +
- name_len + value_len);
+ newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size);
/* 1. Check space */
if (value) {
@@ -555,12 +564,12 @@
* We just write new entry.
*/
memset(last, 0, newsize);
- last->e_name_index = name_index;
- last->e_name_len = name_len;
- memcpy(last->e_name, name, name_len);
- pval = last->e_name + name_len;
- memcpy(pval, value, value_len);
- last->e_value_size = cpu_to_le16(value_len);
+ last->e_name_index = index;
+ last->e_name_len = len;
+ memcpy(last->e_name, name, len);
+ pval = last->e_name + len;
+ memcpy(pval, value, size);
+ last->e_value_size = cpu_to_le16(size);
new_hsize += newsize;
}
@@ -583,18 +592,23 @@
return error;
}
-int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
- const void *value, size_t value_len, struct page *ipage)
+int f2fs_setxattr(struct inode *inode, int index, const char *name,
+ const void *value, size_t size,
+ struct page *ipage, int flags)
{
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
int err;
+ /* this case is only from init_inode_metadata */
+ if (ipage)
+ return __f2fs_setxattr(inode, index, name, value,
+ size, ipage, flags);
f2fs_balance_fs(sbi);
f2fs_lock_op(sbi);
/* protect xattr_ver */
down_write(&F2FS_I(inode)->i_sem);
- err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage);
+ err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags);
up_write(&F2FS_I(inode)->i_sem);
f2fs_unlock_op(sbi);
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index b21d9eb..34ab7db 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -114,18 +114,18 @@
extern const struct xattr_handler *f2fs_xattr_handlers[];
extern int f2fs_setxattr(struct inode *, int, const char *,
- const void *, size_t, struct page *);
+ const void *, size_t, struct page *, int);
extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t);
extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t);
#else
#define f2fs_xattr_handlers NULL
-static inline int f2fs_setxattr(struct inode *inode, int name_index,
- const char *name, const void *value, size_t value_len)
+static inline int f2fs_setxattr(struct inode *inode, int index,
+ const char *name, const void *value, size_t size, int flags)
{
return -EOPNOTSUPP;
}
-static inline int f2fs_getxattr(struct inode *inode, int name_index,
+static inline int f2fs_getxattr(struct inode *inode, int index,
const char *name, void *buffer, size_t buffer_size)
{
return -EOPNOTSUPP;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index df53e17..6ff0b0b 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -19,6 +19,7 @@
#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* 4KB: F2FS_BLKSIZE */
#define F2FS_BLKSIZE 4096 /* support only 4KB block */
#define F2FS_MAX_EXTENSION 64 /* # of extension entries */
+#define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE)
#define NULL_ADDR ((block_t)0) /* used as block_t addresses */
#define NEW_ADDR ((block_t)-1) /* used as block_t addresses */
@@ -75,6 +76,7 @@
__le16 volume_name[512]; /* volume name */
__le32 extension_count; /* # of extensions below */
__u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */
+ __le32 cp_payload;
} __packed;
/*
@@ -146,6 +148,9 @@
#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */
#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */
+#define ADDRS_PER_PAGE(page, fi) \
+ (IS_INODE(page) ? ADDRS_PER_INODE(fi) : ADDRS_PER_BLOCK)
+
#define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1)
#define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2)
#define NODE_IND1_BLOCK (DEF_ADDRS_PER_INODE + 3)
@@ -391,6 +396,9 @@
/* MAX level for dir lookup */
#define MAX_DIR_HASH_DEPTH 63
+/* MAX buckets in one level of dir */
+#define MAX_DIR_BUCKETS (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1))
+
#define SIZE_OF_DIR_ENTRY 11 /* by byte */
#define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \
BITS_PER_BYTE)
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 67f38fa..b983990 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -659,6 +659,66 @@
TP_CONDITION(bio)
);
+TRACE_EVENT(f2fs_write_begin,
+
+ TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+ unsigned int flags),
+
+ TP_ARGS(inode, pos, len, flags),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(loff_t, pos)
+ __field(unsigned int, len)
+ __field(unsigned int, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->pos = pos;
+ __entry->len = len;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, pos = %llu, len = %u, flags = %u",
+ show_dev_ino(__entry),
+ (unsigned long long)__entry->pos,
+ __entry->len,
+ __entry->flags)
+);
+
+TRACE_EVENT(f2fs_write_end,
+
+ TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+ unsigned int copied),
+
+ TP_ARGS(inode, pos, len, copied),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(loff_t, pos)
+ __field(unsigned int, len)
+ __field(unsigned int, copied)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->pos = pos;
+ __entry->len = len;
+ __entry->copied = copied;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, pos = %llu, len = %u, copied = %u",
+ show_dev_ino(__entry),
+ (unsigned long long)__entry->pos,
+ __entry->len,
+ __entry->copied)
+);
+
DECLARE_EVENT_CLASS(f2fs__page,
TP_PROTO(struct page *page, int type),
@@ -672,6 +732,7 @@
__field(int, dir)
__field(pgoff_t, index)
__field(int, dirty)
+ __field(int, uptodate)
),
TP_fast_assign(
@@ -681,14 +742,31 @@
__entry->dir = S_ISDIR(page->mapping->host->i_mode);
__entry->index = page->index;
__entry->dirty = PageDirty(page);
+ __entry->uptodate = PageUptodate(page);
),
- TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, dirty = %d",
+ TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, "
+ "dirty = %d, uptodate = %d",
show_dev_ino(__entry),
show_block_type(__entry->type),
show_file_type(__entry->dir),
(unsigned long)__entry->index,
- __entry->dirty)
+ __entry->dirty,
+ __entry->uptodate)
+);
+
+DEFINE_EVENT(f2fs__page, f2fs_writepage,
+
+ TP_PROTO(struct page *page, int type),
+
+ TP_ARGS(page, type)
+);
+
+DEFINE_EVENT(f2fs__page, f2fs_readpage,
+
+ TP_PROTO(struct page *page, int type),
+
+ TP_ARGS(page, type)
);
DEFINE_EVENT(f2fs__page, f2fs_set_page_dirty,
@@ -705,6 +783,70 @@
TP_ARGS(page, type)
);
+TRACE_EVENT(f2fs_writepages,
+
+ TP_PROTO(struct inode *inode, struct writeback_control *wbc, int type),
+
+ TP_ARGS(inode, wbc, type),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(int, type)
+ __field(int, dir)
+ __field(long, nr_to_write)
+ __field(long, pages_skipped)
+ __field(loff_t, range_start)
+ __field(loff_t, range_end)
+ __field(pgoff_t, writeback_index)
+ __field(int, sync_mode)
+ __field(char, for_kupdate)
+ __field(char, for_background)
+ __field(char, tagged_writepages)
+ __field(char, for_reclaim)
+ __field(char, range_cyclic)
+ __field(char, for_sync)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = inode->i_ino;
+ __entry->type = type;
+ __entry->dir = S_ISDIR(inode->i_mode);
+ __entry->nr_to_write = wbc->nr_to_write;
+ __entry->pages_skipped = wbc->pages_skipped;
+ __entry->range_start = wbc->range_start;
+ __entry->range_end = wbc->range_end;
+ __entry->writeback_index = inode->i_mapping->writeback_index;
+ __entry->sync_mode = wbc->sync_mode;
+ __entry->for_kupdate = wbc->for_kupdate;
+ __entry->for_background = wbc->for_background;
+ __entry->tagged_writepages = wbc->tagged_writepages;
+ __entry->for_reclaim = wbc->for_reclaim;
+ __entry->range_cyclic = wbc->range_cyclic;
+ __entry->for_sync = wbc->for_sync;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, %s, %s, nr_to_write %ld, "
+ "skipped %ld, start %lld, end %lld, wb_idx %lu, sync_mode %d, "
+ "kupdate %u background %u tagged %u reclaim %u cyclic %u sync %u",
+ show_dev_ino(__entry),
+ show_block_type(__entry->type),
+ show_file_type(__entry->dir),
+ __entry->nr_to_write,
+ __entry->pages_skipped,
+ __entry->range_start,
+ __entry->range_end,
+ (unsigned long)__entry->writeback_index,
+ __entry->sync_mode,
+ __entry->for_kupdate,
+ __entry->for_background,
+ __entry->tagged_writepages,
+ __entry->for_reclaim,
+ __entry->range_cyclic,
+ __entry->for_sync)
+);
+
TRACE_EVENT(f2fs_submit_page_mbio,
TP_PROTO(struct page *page, int rw, int type, block_t blk_addr),