NFS: Remove pNFS bloat from the generic write path

We have no business doing any this in the standard write release path.
Get rid of it, and put it in the pNFS layer.

Also, while we're at it, get rid of the completely bogus unlock/relock
semantics that were present in nfs_writeback_release_full(). It is
not only unnecessary, but actually dangerous to release the write lock
just in order to take it again in nfs_page_async_flush(). Better just
to open code the pgio operations in a pnfs helper.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 3f4d957..5ee9253 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -307,6 +307,8 @@
 /* write.c */
 extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
 		struct list_head *head);
+extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
+				  struct inode *inode, int ioflags);
 extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_writedata_release(struct nfs_write_data *wdata);
 extern void nfs_commit_free(struct nfs_write_data *p);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index f881a63..17149a4 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1166,6 +1166,33 @@
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
 
+static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head)
+{
+	struct nfs_pageio_descriptor pgio;
+	LIST_HEAD(failed);
+
+	/* Resend all requests through the MDS */
+	nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE);
+	while (!list_empty(head)) {
+		struct nfs_page *req = nfs_list_entry(head->next);
+
+		nfs_list_remove_request(req);
+		if (!nfs_pageio_add_request(&pgio, req))
+			nfs_list_add_request(req, &failed);
+	}
+	nfs_pageio_complete(&pgio);
+
+	if (!list_empty(&failed)) {
+		/* For some reason our attempt to resend pages. Mark the
+		 * overall send request as having failed, and let
+		 * nfs_writeback_release_full deal with the error.
+		 */
+		list_move(&failed, head);
+		return -EIO;
+	}
+	return 0;
+}
+
 /*
  * Called by non rpc-based layout drivers
  */
@@ -1175,8 +1202,6 @@
 		pnfs_set_layoutcommit(data);
 		data->mds_ops->rpc_call_done(&data->task, data);
 	} else {
-		put_lseg(data->lseg);
-		data->lseg = NULL;
 		dprintk("pnfs write error = %d\n", data->pnfs_error);
 		if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
 						PNFS_LAYOUTRET_ON_ERROR) {
@@ -1187,6 +1212,7 @@
 				  &NFS_I(data->inode)->flags);
 			pnfs_return_layout(data->inode);
 		}
+		data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages);
 	}
 	data->mds_ops->rpc_release(data);
 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 1dda78d..0c38852 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1052,7 +1052,7 @@
 	.pg_doio = nfs_generic_pg_writepages,
 };
 
-static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
+void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
 				  struct inode *inode, int ioflags)
 {
 	nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
@@ -1166,13 +1166,7 @@
 static void nfs_writeback_release_full(void *calldata)
 {
 	struct nfs_write_data	*data = calldata;
-	int ret, status = data->task.tk_status;
-	struct nfs_pageio_descriptor pgio;
-
-	if (data->pnfs_error) {
-		nfs_pageio_init_write_mds(&pgio, data->inode, FLUSH_STABLE);
-		pgio.pg_recoalesce = 1;
-	}
+	int status = data->task.tk_status;
 
 	/* Update attributes as result of writeback. */
 	while (!list_empty(&data->pages)) {
@@ -1188,11 +1182,6 @@
 			req->wb_bytes,
 			(long long)req_offset(req));
 
-		if (data->pnfs_error) {
-			dprintk(", pnfs error = %d\n", data->pnfs_error);
-			goto next;
-		}
-
 		if (status < 0) {
 			nfs_set_pageerror(page);
 			nfs_context_set_write_error(req->wb_context, status);
@@ -1212,19 +1201,7 @@
 	next:
 		nfs_clear_page_tag_locked(req);
 		nfs_end_page_writeback(page);
-		if (data->pnfs_error) {
-			lock_page(page);
-			nfs_pageio_cond_complete(&pgio, page->index);
-			ret = nfs_page_async_flush(&pgio, page, 0);
-			if (ret) {
-				nfs_set_pageerror(page);
-				dprintk("rewrite to MDS error = %d\n", ret);
-			}
-			unlock_page(page);
-		}
 	}
-	if (data->pnfs_error)
-		nfs_pageio_complete(&pgio);
 	nfs_writedata_release(calldata);
 }