[PATCH] introduce a "kernel-internal pipe object" abstraction

separate out the 'internal pipe object' abstraction, and make it
usable to splice. This cleans up and fixes several aspects of the
internal splice APIs and the pipe code:

 - pipes: the allocation and freeing of pipe_inode_info is now more symmetric
   and more streamlined with existing kernel practices.

 - splice: small micro-optimization: less pointer dereferencing in splice
   methods

Signed-off-by: Ingo Molnar <mingo@elte.hu>

Update XFS for the ->splice_read/->splice_write changes.

Signed-off-by: Jens Axboe <axboe@suse.de>
diff --git a/fs/fifo.c b/fs/fifo.c
index 889f722..b16e2f5 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -15,12 +15,13 @@
 #include <linux/fs.h>
 #include <linux/pipe_fs_i.h>
 
-static void wait_for_partner(struct inode* inode, unsigned int* cnt)
+static void wait_for_partner(struct inode* inode, unsigned int *cnt)
 {
 	int cur = *cnt;	
-	while(cur == *cnt) {
-		pipe_wait(inode);
-		if(signal_pending(current))
+
+	while (cur == *cnt) {
+		pipe_wait(inode->i_pipe);
+		if (signal_pending(current))
 			break;
 	}
 }
@@ -37,7 +38,8 @@
 	mutex_lock(PIPE_MUTEX(*inode));
 	if (!inode->i_pipe) {
 		ret = -ENOMEM;
-		if(!pipe_new(inode))
+		inode->i_pipe = alloc_pipe_info(inode);
+		if (!inode->i_pipe)
 			goto err_nocleanup;
 	}
 	filp->f_version = 0;
diff --git a/fs/pipe.c b/fs/pipe.c
index 795df98..705b486 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -36,7 +36,7 @@
  */
 
 /* Drop the inode semaphore and wait for a pipe event, atomically */
-void pipe_wait(struct inode * inode)
+void pipe_wait(struct pipe_inode_info *pipe)
 {
 	DEFINE_WAIT(wait);
 
@@ -44,11 +44,13 @@
 	 * Pipes are system-local resources, so sleeping on them
 	 * is considered a noninteractive wait:
 	 */
-	prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
-	mutex_unlock(PIPE_MUTEX(*inode));
+	prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
+	if (pipe->inode)
+		mutex_unlock(&pipe->inode->i_mutex);
 	schedule();
-	finish_wait(PIPE_WAIT(*inode), &wait);
-	mutex_lock(PIPE_MUTEX(*inode));
+	finish_wait(&pipe->wait, &wait);
+	if (pipe->inode)
+		mutex_lock(&pipe->inode->i_mutex);
 }
 
 static int
@@ -223,7 +225,7 @@
 			wake_up_interruptible_sync(PIPE_WAIT(*inode));
  			kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
 		}
-		pipe_wait(inode);
+		pipe_wait(inode->i_pipe);
 	}
 	mutex_unlock(PIPE_MUTEX(*inode));
 	/* Signal writers asynchronously that there is more room.  */
@@ -370,7 +372,7 @@
 			do_wakeup = 0;
 		}
 		PIPE_WAITING_WRITERS(*inode)++;
-		pipe_wait(inode);
+		pipe_wait(inode->i_pipe);
 		PIPE_WAITING_WRITERS(*inode)--;
 	}
 out:
@@ -675,6 +677,20 @@
 	.fasync		= pipe_rdwr_fasync,
 };
 
+struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
+{
+	struct pipe_inode_info *info;
+
+	info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
+	if (info) {
+		init_waitqueue_head(&info->wait);
+		info->r_counter = info->w_counter = 1;
+		info->inode = inode;
+	}
+
+	return info;
+}
+
 void free_pipe_info(struct inode *inode)
 {
 	int i;
@@ -691,23 +707,6 @@
 	kfree(info);
 }
 
-struct inode* pipe_new(struct inode* inode)
-{
-	struct pipe_inode_info *info;
-
-	info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
-	if (!info)
-		goto fail_page;
-	inode->i_pipe = info;
-
-	init_waitqueue_head(PIPE_WAIT(*inode));
-	PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
-
-	return inode;
-fail_page:
-	return NULL;
-}
-
 static struct vfsmount *pipe_mnt __read_mostly;
 static int pipefs_delete_dentry(struct dentry *dentry)
 {
@@ -724,8 +723,10 @@
 	if (!inode)
 		goto fail_inode;
 
-	if(!pipe_new(inode))
+	inode->i_pipe = alloc_pipe_info(inode);
+	if (!inode->i_pipe)
 		goto fail_iput;
+
 	PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
 	inode->i_fop = &rdwr_pipe_fops;
 
diff --git a/fs/splice.c b/fs/splice.c
index 9bfd6af..ed91a62 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -136,34 +136,33 @@
  * Pipe output worker. This sets up our pipe format with the page cache
  * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
  */
-static ssize_t move_to_pipe(struct inode *inode, struct page **pages,
+static ssize_t move_to_pipe(struct pipe_inode_info *pipe, struct page **pages,
 			    int nr_pages, unsigned long offset,
 			    unsigned long len, unsigned int flags)
 {
-	struct pipe_inode_info *info;
 	int ret, do_wakeup, i;
 
 	ret = 0;
 	do_wakeup = 0;
 	i = 0;
 
-	mutex_lock(PIPE_MUTEX(*inode));
+	if (pipe->inode)
+		mutex_lock(&pipe->inode->i_mutex);
 
-	info = inode->i_pipe;
 	for (;;) {
 		int bufs;
 
-		if (!PIPE_READERS(*inode)) {
+		if (!pipe->readers) {
 			send_sig(SIGPIPE, current, 0);
 			if (!ret)
 				ret = -EPIPE;
 			break;
 		}
 
-		bufs = info->nrbufs;
+		bufs = pipe->nrbufs;
 		if (bufs < PIPE_BUFFERS) {
-			int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS - 1);
-			struct pipe_buffer *buf = info->bufs + newbuf;
+			int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS - 1);
+			struct pipe_buffer *buf = pipe->bufs + newbuf;
 			struct page *page = pages[i++];
 			unsigned long this_len;
 
@@ -175,7 +174,7 @@
 			buf->offset = offset;
 			buf->len = this_len;
 			buf->ops = &page_cache_pipe_buf_ops;
-			info->nrbufs = ++bufs;
+			pipe->nrbufs = ++bufs;
 			do_wakeup = 1;
 
 			ret += this_len;
@@ -205,25 +204,25 @@
 
 		if (do_wakeup) {
 			smp_mb();
-			if (waitqueue_active(PIPE_WAIT(*inode)))
-				wake_up_interruptible_sync(PIPE_WAIT(*inode));
-			kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO,
-				    POLL_IN);
+			if (waitqueue_active(&pipe->wait))
+				wake_up_interruptible_sync(&pipe->wait);
+			kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 			do_wakeup = 0;
 		}
 
-		PIPE_WAITING_WRITERS(*inode)++;
-		pipe_wait(inode);
-		PIPE_WAITING_WRITERS(*inode)--;
+		pipe->waiting_writers++;
+		pipe_wait(pipe);
+		pipe->waiting_writers--;
 	}
 
-	mutex_unlock(PIPE_MUTEX(*inode));
+	if (pipe->inode)
+		mutex_unlock(&pipe->inode->i_mutex);
 
 	if (do_wakeup) {
 		smp_mb();
-		if (waitqueue_active(PIPE_WAIT(*inode)))
-			wake_up_interruptible(PIPE_WAIT(*inode));
-		kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
+		if (waitqueue_active(&pipe->wait))
+			wake_up_interruptible(&pipe->wait);
+		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 	}
 
 	while (i < nr_pages)
@@ -232,8 +231,9 @@
 	return ret;
 }
 
-static int __generic_file_splice_read(struct file *in, struct inode *pipe,
-				      size_t len, unsigned int flags)
+static int
+__generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe,
+			   size_t len, unsigned int flags)
 {
 	struct address_space *mapping = in->f_mapping;
 	unsigned int offset, nr_pages;
@@ -298,7 +298,7 @@
  * Will read pages from given file and fill them into a pipe.
  *
  */
-ssize_t generic_file_splice_read(struct file *in, struct inode *pipe,
+ssize_t generic_file_splice_read(struct file *in, struct pipe_inode_info *pipe,
 				 size_t len, unsigned int flags)
 {
 	ssize_t spliced;
@@ -306,6 +306,7 @@
 
 	ret = 0;
 	spliced = 0;
+
 	while (len) {
 		ret = __generic_file_splice_read(in, pipe, len, flags);
 
@@ -509,11 +510,10 @@
  * key here is the 'actor' worker passed in that actually moves the data
  * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
  */
-static ssize_t move_from_pipe(struct inode *inode, struct file *out,
+static ssize_t move_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 			      size_t len, unsigned int flags,
 			      splice_actor *actor)
 {
-	struct pipe_inode_info *info;
 	int ret, do_wakeup, err;
 	struct splice_desc sd;
 
@@ -525,22 +525,22 @@
 	sd.file = out;
 	sd.pos = out->f_pos;
 
-	mutex_lock(PIPE_MUTEX(*inode));
+	if (pipe->inode)
+		mutex_lock(&pipe->inode->i_mutex);
 
-	info = inode->i_pipe;
 	for (;;) {
-		int bufs = info->nrbufs;
+		int bufs = pipe->nrbufs;
 
 		if (bufs) {
-			int curbuf = info->curbuf;
-			struct pipe_buffer *buf = info->bufs + curbuf;
+			int curbuf = pipe->curbuf;
+			struct pipe_buffer *buf = pipe->bufs + curbuf;
 			struct pipe_buf_operations *ops = buf->ops;
 
 			sd.len = buf->len;
 			if (sd.len > sd.total_len)
 				sd.len = sd.total_len;
 
-			err = actor(info, buf, &sd);
+			err = actor(pipe, buf, &sd);
 			if (err) {
 				if (!ret && err != -ENODATA)
 					ret = err;
@@ -553,10 +553,10 @@
 			buf->len -= sd.len;
 			if (!buf->len) {
 				buf->ops = NULL;
-				ops->release(info, buf);
+				ops->release(pipe, buf);
 				curbuf = (curbuf + 1) & (PIPE_BUFFERS - 1);
-				info->curbuf = curbuf;
-				info->nrbufs = --bufs;
+				pipe->curbuf = curbuf;
+				pipe->nrbufs = --bufs;
 				do_wakeup = 1;
 			}
 
@@ -568,9 +568,9 @@
 
 		if (bufs)
 			continue;
-		if (!PIPE_WRITERS(*inode))
+		if (!pipe->writers)
 			break;
-		if (!PIPE_WAITING_WRITERS(*inode)) {
+		if (!pipe->waiting_writers) {
 			if (ret)
 				break;
 		}
@@ -589,22 +589,23 @@
 
 		if (do_wakeup) {
 			smp_mb();
-			if (waitqueue_active(PIPE_WAIT(*inode)))
-				wake_up_interruptible_sync(PIPE_WAIT(*inode));
-			kill_fasync(PIPE_FASYNC_WRITERS(*inode),SIGIO,POLL_OUT);
+			if (waitqueue_active(&pipe->wait))
+				wake_up_interruptible_sync(&pipe->wait);
+			kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 			do_wakeup = 0;
 		}
 
-		pipe_wait(inode);
+		pipe_wait(pipe);
 	}
 
-	mutex_unlock(PIPE_MUTEX(*inode));
+	if (pipe->inode)
+		mutex_unlock(&pipe->inode->i_mutex);
 
 	if (do_wakeup) {
 		smp_mb();
-		if (waitqueue_active(PIPE_WAIT(*inode)))
-			wake_up_interruptible(PIPE_WAIT(*inode));
-		kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
+		if (waitqueue_active(&pipe->wait))
+			wake_up_interruptible(&pipe->wait);
+		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 	}
 
 	mutex_lock(&out->f_mapping->host->i_mutex);
@@ -616,7 +617,7 @@
 
 /**
  * generic_file_splice_write - splice data from a pipe to a file
- * @inode:	pipe inode
+ * @pipe:	pipe info
  * @out:	file to write to
  * @len:	number of bytes to splice
  * @flags:	splice modifier flags
@@ -625,11 +626,14 @@
  * the given pipe inode to the given file.
  *
  */
-ssize_t generic_file_splice_write(struct inode *inode, struct file *out,
-				  size_t len, unsigned int flags)
+ssize_t
+generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
+			  size_t len, unsigned int flags)
 {
 	struct address_space *mapping = out->f_mapping;
-	ssize_t ret = move_from_pipe(inode, out, len, flags, pipe_to_file);
+	ssize_t ret;
+
+	ret = move_from_pipe(pipe, out, len, flags, pipe_to_file);
 
 	/*
 	 * if file or inode is SYNC and we actually wrote some data, sync it
@@ -664,10 +668,10 @@
  * is involved.
  *
  */
-ssize_t generic_splice_sendpage(struct inode *inode, struct file *out,
+ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
 				size_t len, unsigned int flags)
 {
-	return move_from_pipe(inode, out, len, flags, pipe_to_sendpage);
+	return move_from_pipe(pipe, out, len, flags, pipe_to_sendpage);
 }
 
 EXPORT_SYMBOL(generic_splice_sendpage);
@@ -675,8 +679,8 @@
 /*
  * Attempt to initiate a splice from pipe to file.
  */
-static long do_splice_from(struct inode *pipe, struct file *out, size_t len,
-			   unsigned int flags)
+static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
+			   size_t len, unsigned int flags)
 {
 	loff_t pos;
 	int ret;
@@ -698,8 +702,8 @@
 /*
  * Attempt to initiate a splice from a file to a pipe.
  */
-static long do_splice_to(struct file *in, struct inode *pipe, size_t len,
-			 unsigned int flags)
+static long do_splice_to(struct file *in, struct pipe_inode_info *pipe,
+			 size_t len, unsigned int flags)
 {
 	loff_t pos, isize, left;
 	int ret;
@@ -732,14 +736,14 @@
 static long do_splice(struct file *in, struct file *out, size_t len,
 		      unsigned int flags)
 {
-	struct inode *pipe;
+	struct pipe_inode_info *pipe;
 
-	pipe = in->f_dentry->d_inode;
-	if (pipe->i_pipe)
+	pipe = in->f_dentry->d_inode->i_pipe;
+	if (pipe)
 		return do_splice_from(pipe, out, len, flags);
 
-	pipe = out->f_dentry->d_inode;
-	if (pipe->i_pipe)
+	pipe = out->f_dentry->d_inode->i_pipe;
+	if (pipe)
 		return do_splice_to(in, pipe, len, flags);
 
 	return -EINVAL;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index ae4c475..269721a 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -252,7 +252,7 @@
 STATIC ssize_t
 xfs_file_splice_read(
 	struct file		*infilp,
-	struct inode		*pipe,
+	struct pipe_inode_info	*pipe,
 	size_t			len,
 	unsigned int		flags)
 {
@@ -266,7 +266,7 @@
 STATIC ssize_t
 xfs_file_splice_read_invis(
 	struct file		*infilp,
-	struct inode		*pipe,
+	struct pipe_inode_info	*pipe,
 	size_t			len,
 	unsigned int		flags)
 {
@@ -279,7 +279,7 @@
 
 STATIC ssize_t
 xfs_file_splice_write(
-	struct inode		*pipe,
+	struct pipe_inode_info	*pipe,
 	struct file		*outfilp,
 	size_t			len,
 	unsigned int		flags)
@@ -293,7 +293,7 @@
 
 STATIC ssize_t
 xfs_file_splice_write_invis(
-	struct inode		*pipe,
+	struct pipe_inode_info	*pipe,
 	struct file		*outfilp,
 	size_t			len,
 	unsigned int		flags)
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 90cd314..74a5293 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -338,7 +338,7 @@
 xfs_splice_read(
 	bhv_desc_t		*bdp,
 	struct file		*infilp,
-	struct inode		*pipe,
+	struct pipe_inode_info	*pipe,
 	size_t			count,
 	int			flags,
 	int			ioflags,
@@ -380,7 +380,7 @@
 ssize_t
 xfs_splice_write(
 	bhv_desc_t		*bdp,
-	struct inode		*pipe,
+	struct pipe_inode_info	*pipe,
 	struct file		*outfilp,
 	size_t			count,
 	int			flags,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index eaa5659..55c689a 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -94,9 +94,9 @@
 				loff_t *, int, size_t, read_actor_t,
 				void *, struct cred *);
 extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *,
-				struct inode *, size_t, int, int,
+				struct pipe_inode_info *, size_t, int, int,
 				struct cred *);
-extern ssize_t xfs_splice_write(struct bhv_desc *, struct inode *,
+extern ssize_t xfs_splice_write(struct bhv_desc *, struct pipe_inode_info *,
 				struct file *, size_t, int, int,
 				struct cred *);
 
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 6f1c79a..88b09f1 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -174,9 +174,9 @@
 				loff_t *, int, size_t, read_actor_t,
 				void *, struct cred *);
 typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *,
-				struct inode *, size_t, int, int,
+				struct pipe_inode_info *, size_t, int, int,
 				struct cred *);
-typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct inode *,
+typedef ssize_t (*vop_splice_write_t)(bhv_desc_t *, struct pipe_inode_info *,
 				struct file *, size_t, int, int,
 				struct cred *);
 typedef int	(*vop_ioctl_t)(bhv_desc_t *, struct inode *, struct file *,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1e9ebab..7e64544 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1039,8 +1039,8 @@
 	int (*check_flags)(int);
 	int (*dir_notify)(struct file *filp, unsigned long arg);
 	int (*flock) (struct file *, int, struct file_lock *);
-	ssize_t (*splice_write)(struct inode *, struct file *, size_t, unsigned int);
-	ssize_t (*splice_read)(struct file *, struct inode *, size_t, unsigned int);
+	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int);
+	ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
 };
 
 struct inode_operations {
@@ -1611,8 +1611,8 @@
 extern void do_generic_mapping_read(struct address_space *mapping,
 				    struct file_ra_state *, struct file *,
 				    loff_t *, read_descriptor_t *, read_actor_t);
-extern ssize_t generic_file_splice_read(struct file *, struct inode *, size_t, unsigned int);
-extern ssize_t generic_file_splice_write(struct inode *, struct file *, size_t, unsigned int);
+extern ssize_t generic_file_splice_read(struct file *, struct pipe_inode_info *, size_t, unsigned int);
+extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, size_t, unsigned int);
 extern void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
 extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, 
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index ec38495..9cf99cb 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -36,6 +36,7 @@
 	unsigned int w_counter;
 	struct fasync_struct *fasync_readers;
 	struct fasync_struct *fasync_writers;
+	struct inode *inode;
 };
 
 /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
@@ -53,10 +54,10 @@
 #define PIPE_FASYNC_WRITERS(inode)     (&((inode).i_pipe->fasync_writers))
 
 /* Drop the inode semaphore and wait for a pipe event, atomically */
-void pipe_wait(struct inode * inode);
+void pipe_wait(struct pipe_inode_info *pipe);
 
-struct inode* pipe_new(struct inode* inode);
-void free_pipe_info(struct inode* inode);
+struct pipe_inode_info * alloc_pipe_info(struct inode * inode);
+void free_pipe_info(struct inode * inode);
 
 /*
  * splice is tied to pipes as a transport (at least for now), so we'll just