ocfs2: Use own splice write actor

We need to fill holes during a splice write. Provide our own splice write
actor which can call ocfs2_file_buffered_write() with a splice-specific
callback.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 605c82a..014f4f5 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -25,6 +25,7 @@
 #include <linux/pagemap.h>
 #include <asm/byteorder.h>
 #include <linux/swap.h>
+#include <linux/pipe_fs_i.h>
 
 #define MLOG_MASK_PREFIX ML_FILE_IO
 #include <cluster/masklog.h>
@@ -749,6 +750,74 @@
 }
 
 /*
+ * This will copy user data from the buffer page in the splice
+ * context.
+ *
+ * For now, we ignore SPLICE_F_MOVE as that would require some extra
+ * communication out all the way to ocfs2_write().
+ */
+int ocfs2_map_and_write_splice_data(struct inode *inode,
+				  struct ocfs2_write_ctxt *wc, u64 *p_blkno,
+				  unsigned int *ret_from, unsigned int *ret_to)
+{
+	int ret;
+	unsigned int to, from, cluster_start, cluster_end;
+	char *src, *dst;
+	struct ocfs2_splice_write_priv *sp = wc->w_private;
+	struct pipe_buffer *buf = sp->s_buf;
+	unsigned long bytes, src_from;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+	ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start,
+					&cluster_end);
+
+	from = sp->s_offset;
+	src_from = sp->s_buf_offset;
+	bytes = wc->w_count;
+
+	if (wc->w_large_pages) {
+		/*
+		 * For cluster size < page size, we have to
+		 * calculate pos within the cluster and obey
+		 * the rightmost boundary.
+		 */
+		bytes = min(bytes, (unsigned long)(osb->s_clustersize
+				   - (wc->w_pos & (osb->s_clustersize - 1))));
+	}
+	to = from + bytes;
+
+	if (wc->w_this_page_new)
+		ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
+					    cluster_start, cluster_end, 1);
+	else
+		ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
+					    from, to, 0);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	BUG_ON(from > PAGE_CACHE_SIZE);
+	BUG_ON(to > PAGE_CACHE_SIZE);
+	BUG_ON(from > osb->s_clustersize);
+	BUG_ON(to > osb->s_clustersize);
+
+	src = buf->ops->map(sp->s_pipe, buf, 1);
+	dst = kmap_atomic(wc->w_this_page, KM_USER1);
+	memcpy(dst + from, src + src_from, bytes);
+	kunmap_atomic(wc->w_this_page, KM_USER1);
+	buf->ops->unmap(sp->s_pipe, buf, src);
+
+	wc->w_finished_copy = 1;
+
+	*ret_from = from;
+	*ret_to = to;
+out:
+
+	return bytes ? (unsigned int)bytes : ret;
+}
+
+/*
  * This will copy user data from the iovec in the buffered write
  * context.
  */
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 7d94071..1b4ba53 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -80,6 +80,20 @@
 				  unsigned int *ret_from,
 				  unsigned int *ret_to);
 
+struct ocfs2_splice_write_priv {
+	struct splice_desc		*s_sd;
+	struct pipe_buffer		*s_buf;
+	struct pipe_inode_info		*s_pipe;
+	/* Neither offset value is ever larger than one page */
+	unsigned int			s_offset;
+	unsigned int			s_buf_offset;
+};
+int ocfs2_map_and_write_splice_data(struct inode *inode,
+				    struct ocfs2_write_ctxt *wc,
+				    u64 *p_blkno,
+				    unsigned int *ret_from,
+				    unsigned int *ret_to);
+
 /* all ocfs2_dio_end_io()'s fault */
 #define ocfs2_iocb_is_rw_locked(iocb) \
 	test_bit(0, (unsigned long *)&iocb->private)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 5fd49ec..f516619 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1603,6 +1603,84 @@
 	return written ? written : ret;
 }
 
+static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
+				    struct pipe_buffer *buf,
+				    struct splice_desc *sd)
+{
+	int ret, count, total = 0;
+	ssize_t copied = 0;
+	struct ocfs2_splice_write_priv sp;
+
+	ret = buf->ops->pin(pipe, buf);
+	if (ret)
+		goto out;
+
+	sp.s_sd = sd;
+	sp.s_buf = buf;
+	sp.s_pipe = pipe;
+	sp.s_offset = sd->pos & ~PAGE_CACHE_MASK;
+	sp.s_buf_offset = buf->offset;
+
+	count = sd->len;
+	if (count + sp.s_offset > PAGE_CACHE_SIZE)
+		count = PAGE_CACHE_SIZE - sp.s_offset;
+
+	do {
+		/*
+		 * splice wants us to copy up to one page at a
+		 * time. For pagesize > cluster size, this means we
+		 * might enter ocfs2_buffered_write_cluster() more
+		 * than once, so keep track of our progress here.
+		 */
+		copied = ocfs2_buffered_write_cluster(sd->file,
+						      (loff_t)sd->pos + total,
+						      count,
+						      ocfs2_map_and_write_splice_data,
+						      &sp);
+		if (copied < 0) {
+			mlog_errno(copied);
+			ret = copied;
+			goto out;
+		}
+
+		count -= copied;
+		sp.s_offset += copied;
+		sp.s_buf_offset += copied;
+		total += copied;
+	} while (count);
+
+	ret = 0;
+out:
+
+	return total ? total : ret;
+}
+
+static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,
+					 struct file *out,
+					 loff_t *ppos,
+					 size_t len,
+					 unsigned int flags)
+{
+	int ret, err;
+	struct address_space *mapping = out->f_mapping;
+	struct inode *inode = mapping->host;
+
+	ret = __splice_from_pipe(pipe, out, ppos, len, flags,
+				 ocfs2_splice_write_actor);
+	if (ret > 0) {
+		*ppos += ret;
+
+		if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
+			err = generic_osync_inode(inode, mapping,
+						  OSYNC_METADATA|OSYNC_DATA);
+			if (err)
+				ret = err;
+		}
+	}
+
+	return ret;
+}
+
 static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
 				       struct file *out,
 				       loff_t *ppos,
@@ -1633,7 +1711,7 @@
 	}
 
 	/* ok, we're done with i_size and alloc work */
-	ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags);
+	ret = __ocfs2_file_splice_write(pipe, out, ppos, len, flags);
 
 out_unlock:
 	ocfs2_rw_unlock(inode, 1);