NFSv4.1/pnfs: Separate out metadata and data consistency for pNFS
The LAYOUTCOMMIT operation means different things to different layout types.
For blocks and objects, it is both a data and metadata consistency operation.
For files and flexfiles, it is only a metadata consistency operation.
This patch separates out the 2 cases, allowing the files/flexfiles layout
drivers to optimise away the data consistency calls to layoutcommit.
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 1cac3c1..d2554fe 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -890,6 +890,7 @@
.free_deviceid_node = bl_free_deviceid_node,
.pg_read_ops = &bl_pg_read_ops,
.pg_write_ops = &bl_pg_write_ops,
+ .sync = pnfs_generic_sync,
};
static int __init nfs4blocklayout_init(void)
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index a317b00..a46bf6d 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -1139,6 +1139,7 @@
.write_pagelist = filelayout_write_pagelist,
.alloc_deviceid_node = filelayout_alloc_deviceid_node,
.free_deviceid_node = filelayout_free_deviceid_node,
+ .sync = pnfs_nfs_generic_sync,
};
static int __init nfs4filelayout_init(void)
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 92d2943..f3ff66e 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1509,6 +1509,7 @@
.write_pagelist = ff_layout_write_pagelist,
.alloc_deviceid_node = ff_layout_alloc_deviceid_node,
.encode_layoutreturn = ff_layout_encode_layoutreturn,
+ .sync = pnfs_nfs_generic_sync,
};
static int __init nfs4flexfilelayout_init(void)
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index befe7a2..866842b 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -112,7 +112,7 @@
mutex_lock(&inode->i_mutex);
ret = nfs_file_fsync_commit(file, start, end, datasync);
if (!ret)
- ret = pnfs_layoutcommit_inode(inode, true);
+ ret = pnfs_sync_inode(inode, !!datasync);
mutex_unlock(&inode->i_mutex);
/*
* If nfs_file_fsync_commit detected a server reboot, then
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 8b5e0e6..5aaed36 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -637,6 +637,8 @@
.pg_read_ops = &objio_pg_read_ops,
.pg_write_ops = &objio_pg_write_ops,
+ .sync = pnfs_generic_sync,
+
.free_deviceid_node = objio_free_deviceid_node,
.encode_layoutcommit = objlayout_encode_layoutcommit,
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index c2ce2db..3f0affe 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2231,6 +2231,13 @@
}
EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode);
+int
+pnfs_generic_sync(struct inode *inode, bool datasync)
+{
+ return pnfs_layoutcommit_inode(inode, true);
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_sync);
+
struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
{
struct nfs4_threshold *thp;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 66bf5e1..231eb23 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -155,6 +155,8 @@
int how,
struct nfs_commit_info *cinfo);
+ int (*sync)(struct inode *inode, bool datasync);
+
/*
* Return PNFS_ATTEMPTED to indicate the layout code has attempted
* I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
@@ -267,6 +269,8 @@
void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t);
void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
+int pnfs_generic_sync(struct inode *inode, bool datasync);
+int pnfs_nfs_generic_sync(struct inode *inode, bool datasync);
int _pnfs_return_layout(struct inode *);
int pnfs_commit_and_return_layout(struct inode *);
void pnfs_ld_write_done(struct nfs_pgio_header *);
@@ -488,6 +492,14 @@
return NFS_SERVER(inode)->pnfs_curr_ld->flags & PNFS_READ_WHOLE_PAGE;
}
+static inline int
+pnfs_sync_inode(struct inode *inode, bool datasync)
+{
+ if (!pnfs_enabled_sb(NFS_SERVER(inode)))
+ return 0;
+ return NFS_SERVER(inode)->pnfs_curr_ld->sync(inode, datasync);
+}
+
static inline bool
pnfs_layoutcommit_outstanding(struct inode *inode)
{
@@ -570,6 +582,12 @@
return false;
}
+static inline int
+pnfs_sync_inode(struct inode *inode, bool datasync)
+{
+ return 0;
+}
+
static inline bool
pnfs_roc(struct inode *ino)
{
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 54e36b3..64d2a59 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -868,3 +868,13 @@
nfs_request_add_commit_list(req, list, cinfo);
}
EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);
+
+int
+pnfs_nfs_generic_sync(struct inode *inode, bool datasync)
+{
+ if (datasync)
+ return 0;
+ return pnfs_layoutcommit_inode(inode, true);
+}
+EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync);
+
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 849ed78..7f933a3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1840,17 +1840,16 @@
*/
int nfs_wb_all(struct inode *inode)
{
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = LONG_MAX,
- .range_start = 0,
- .range_end = LLONG_MAX,
- };
int ret;
trace_nfs_writeback_inode_enter(inode);
- ret = sync_inode(inode, &wbc);
+ ret = filemap_write_and_wait(inode->i_mapping);
+ if (!ret) {
+ ret = nfs_commit_inode(inode, FLUSH_SYNC);
+ if (!ret)
+ pnfs_sync_inode(inode, true);
+ }
trace_nfs_writeback_inode_exit(inode, ret);
return ret;