NFSv4.1/pnfs: Separate out metadata and data consistency for pNFS

The LAYOUTCOMMIT operation means different things to different layout types.
For blocks and objects, it is both a data and metadata consistency operation.
For files and flexfiles, it is only a metadata consistency operation.

This patch separates out the 2 cases, allowing the files/flexfiles layout
drivers to optimise away the data consistency calls to layoutcommit.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 1cac3c1..d2554fe 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -890,6 +890,7 @@
 	.free_deviceid_node		= bl_free_deviceid_node,
 	.pg_read_ops			= &bl_pg_read_ops,
 	.pg_write_ops			= &bl_pg_write_ops,
+	.sync				= pnfs_generic_sync,
 };
 
 static int __init nfs4blocklayout_init(void)
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index a317b00..a46bf6d 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -1139,6 +1139,7 @@
 	.write_pagelist		= filelayout_write_pagelist,
 	.alloc_deviceid_node	= filelayout_alloc_deviceid_node,
 	.free_deviceid_node	= filelayout_free_deviceid_node,
+	.sync			= pnfs_nfs_generic_sync,
 };
 
 static int __init nfs4filelayout_init(void)
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 92d2943..f3ff66e 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1509,6 +1509,7 @@
 	.write_pagelist		= ff_layout_write_pagelist,
 	.alloc_deviceid_node    = ff_layout_alloc_deviceid_node,
 	.encode_layoutreturn    = ff_layout_encode_layoutreturn,
+	.sync			= pnfs_nfs_generic_sync,
 };
 
 static int __init nfs4flexfilelayout_init(void)
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index befe7a2..866842b 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -112,7 +112,7 @@
 		mutex_lock(&inode->i_mutex);
 		ret = nfs_file_fsync_commit(file, start, end, datasync);
 		if (!ret)
-			ret = pnfs_layoutcommit_inode(inode, true);
+			ret = pnfs_sync_inode(inode, !!datasync);
 		mutex_unlock(&inode->i_mutex);
 		/*
 		 * If nfs_file_fsync_commit detected a server reboot, then
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 8b5e0e6..5aaed36 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -637,6 +637,8 @@
 	.pg_read_ops             = &objio_pg_read_ops,
 	.pg_write_ops            = &objio_pg_write_ops,
 
+	.sync			 = pnfs_generic_sync,
+
 	.free_deviceid_node	 = objio_free_deviceid_node,
 
 	.encode_layoutcommit	 = objlayout_encode_layoutcommit,
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index c2ce2db..3f0affe 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2231,6 +2231,13 @@
 }
 EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode);
 
+int
+pnfs_generic_sync(struct inode *inode, bool datasync)
+{
+	return pnfs_layoutcommit_inode(inode, true);
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_sync);
+
 struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
 {
 	struct nfs4_threshold *thp;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 66bf5e1..231eb23 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -155,6 +155,8 @@
 			       int how,
 			       struct nfs_commit_info *cinfo);
 
+	int (*sync)(struct inode *inode, bool datasync);
+
 	/*
 	 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
 	 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
@@ -267,6 +269,8 @@
 void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t);
 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
+int pnfs_generic_sync(struct inode *inode, bool datasync);
+int pnfs_nfs_generic_sync(struct inode *inode, bool datasync);
 int _pnfs_return_layout(struct inode *);
 int pnfs_commit_and_return_layout(struct inode *);
 void pnfs_ld_write_done(struct nfs_pgio_header *);
@@ -488,6 +492,14 @@
 	return NFS_SERVER(inode)->pnfs_curr_ld->flags & PNFS_READ_WHOLE_PAGE;
 }
 
+static inline int
+pnfs_sync_inode(struct inode *inode, bool datasync)
+{
+	if (!pnfs_enabled_sb(NFS_SERVER(inode)))
+		return 0;
+	return NFS_SERVER(inode)->pnfs_curr_ld->sync(inode, datasync);
+}
+
 static inline bool
 pnfs_layoutcommit_outstanding(struct inode *inode)
 {
@@ -570,6 +582,12 @@
 	return false;
 }
 
+static inline int
+pnfs_sync_inode(struct inode *inode, bool datasync)
+{
+	return 0;
+}
+
 static inline bool
 pnfs_roc(struct inode *ino)
 {
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 54e36b3..64d2a59 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -868,3 +868,13 @@
 	nfs_request_add_commit_list(req, list, cinfo);
 }
 EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);
+
+int
+pnfs_nfs_generic_sync(struct inode *inode, bool datasync)
+{
+	if (datasync)
+		return 0;
+	return pnfs_layoutcommit_inode(inode, true);
+}
+EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync);
+
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 849ed78..7f933a3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1840,17 +1840,16 @@
  */
 int nfs_wb_all(struct inode *inode)
 {
-	struct writeback_control wbc = {
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = LONG_MAX,
-		.range_start = 0,
-		.range_end = LLONG_MAX,
-	};
 	int ret;
 
 	trace_nfs_writeback_inode_enter(inode);
 
-	ret = sync_inode(inode, &wbc);
+	ret = filemap_write_and_wait(inode->i_mapping);
+	if (!ret) {
+		ret = nfs_commit_inode(inode, FLUSH_SYNC);
+		if (!ret)
+			pnfs_sync_inode(inode, true);
+	}
 
 	trace_nfs_writeback_inode_exit(inode, ret);
 	return ret;