blob: 4384c929094354d84f27936b1b15c6001b9d9dd5 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/fs/pipe.c
3 *
4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds
5 */
6
7#include <linux/mm.h>
8#include <linux/file.h>
9#include <linux/poll.h>
10#include <linux/slab.h>
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/fs.h>
14#include <linux/mount.h>
15#include <linux/pipe_fs_i.h>
16#include <linux/uio.h>
17#include <linux/highmem.h>
18
19#include <asm/uaccess.h>
20#include <asm/ioctls.h>
21
22/*
23 * We use a start+len construction, which provides full use of the
24 * allocated memory.
25 * -- Florian Coosmann (FGC)
26 *
27 * Reads with count = 0 should always return 0.
28 * -- Julian Bradfield 1999-06-07.
29 *
30 * FIFOs and Pipes now generate SIGIO for both readers and writers.
31 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
32 *
33 * pipe_read & write cleanup
34 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
35 */
36
37/* Drop the inode semaphore and wait for a pipe event, atomically */
38void pipe_wait(struct inode * inode)
39{
40 DEFINE_WAIT(wait);
41
Ingo Molnard79fc0f2005-09-10 00:26:12 -070042 /*
43 * Pipes are system-local resources, so sleeping on them
44 * is considered a noninteractive wait:
45 */
46 prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
Jes Sorensen1b1dcc12006-01-09 15:59:24 -080047 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -070048 schedule();
49 finish_wait(PIPE_WAIT(*inode), &wait);
Jes Sorensen1b1dcc12006-01-09 15:59:24 -080050 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -070051}
52
Arjan van de Ven858119e2006-01-14 13:20:43 -080053static int
Linus Torvalds1da177e2005-04-16 15:20:36 -070054pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
55{
56 unsigned long copy;
57
58 while (len > 0) {
59 while (!iov->iov_len)
60 iov++;
61 copy = min_t(unsigned long, len, iov->iov_len);
62
63 if (copy_from_user(to, iov->iov_base, copy))
64 return -EFAULT;
65 to += copy;
66 len -= copy;
67 iov->iov_base += copy;
68 iov->iov_len -= copy;
69 }
70 return 0;
71}
72
Arjan van de Ven858119e2006-01-14 13:20:43 -080073static int
Linus Torvalds1da177e2005-04-16 15:20:36 -070074pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
75{
76 unsigned long copy;
77
78 while (len > 0) {
79 while (!iov->iov_len)
80 iov++;
81 copy = min_t(unsigned long, len, iov->iov_len);
82
83 if (copy_to_user(iov->iov_base, from, copy))
84 return -EFAULT;
85 from += copy;
86 len -= copy;
87 iov->iov_base += copy;
88 iov->iov_len -= copy;
89 }
90 return 0;
91}
92
93static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf)
94{
95 struct page *page = buf->page;
96
97 if (info->tmp_page) {
98 __free_page(page);
99 return;
100 }
101 info->tmp_page = page;
102}
103
104static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
105{
106 return kmap(buf->page);
107}
108
109static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf)
110{
111 kunmap(buf->page);
112}
113
114static struct pipe_buf_operations anon_pipe_buf_ops = {
115 .can_merge = 1,
116 .map = anon_pipe_buf_map,
117 .unmap = anon_pipe_buf_unmap,
118 .release = anon_pipe_buf_release,
119};
120
121static ssize_t
122pipe_readv(struct file *filp, const struct iovec *_iov,
123 unsigned long nr_segs, loff_t *ppos)
124{
125 struct inode *inode = filp->f_dentry->d_inode;
126 struct pipe_inode_info *info;
127 int do_wakeup;
128 ssize_t ret;
129 struct iovec *iov = (struct iovec *)_iov;
130 size_t total_len;
131
132 total_len = iov_length(iov, nr_segs);
133 /* Null read succeeds. */
134 if (unlikely(total_len == 0))
135 return 0;
136
137 do_wakeup = 0;
138 ret = 0;
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800139 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140 info = inode->i_pipe;
141 for (;;) {
142 int bufs = info->nrbufs;
143 if (bufs) {
144 int curbuf = info->curbuf;
145 struct pipe_buffer *buf = info->bufs + curbuf;
146 struct pipe_buf_operations *ops = buf->ops;
147 void *addr;
148 size_t chars = buf->len;
149 int error;
150
151 if (chars > total_len)
152 chars = total_len;
153
154 addr = ops->map(filp, info, buf);
155 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
156 ops->unmap(info, buf);
157 if (unlikely(error)) {
158 if (!ret) ret = -EFAULT;
159 break;
160 }
161 ret += chars;
162 buf->offset += chars;
163 buf->len -= chars;
164 if (!buf->len) {
165 buf->ops = NULL;
166 ops->release(info, buf);
167 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
168 info->curbuf = curbuf;
169 info->nrbufs = --bufs;
170 do_wakeup = 1;
171 }
172 total_len -= chars;
173 if (!total_len)
174 break; /* common path: read succeeded */
175 }
176 if (bufs) /* More to do? */
177 continue;
178 if (!PIPE_WRITERS(*inode))
179 break;
180 if (!PIPE_WAITING_WRITERS(*inode)) {
181 /* syscall merging: Usually we must not sleep
182 * if O_NONBLOCK is set, or if we got some data.
183 * But if a writer sleeps in kernel space, then
184 * we can wait for that data without violating POSIX.
185 */
186 if (ret)
187 break;
188 if (filp->f_flags & O_NONBLOCK) {
189 ret = -EAGAIN;
190 break;
191 }
192 }
193 if (signal_pending(current)) {
194 if (!ret) ret = -ERESTARTSYS;
195 break;
196 }
197 if (do_wakeup) {
198 wake_up_interruptible_sync(PIPE_WAIT(*inode));
199 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
200 }
201 pipe_wait(inode);
202 }
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800203 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204 /* Signal writers asynchronously that there is more room. */
205 if (do_wakeup) {
206 wake_up_interruptible(PIPE_WAIT(*inode));
207 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
208 }
209 if (ret > 0)
210 file_accessed(filp);
211 return ret;
212}
213
214static ssize_t
215pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
216{
217 struct iovec iov = { .iov_base = buf, .iov_len = count };
218 return pipe_readv(filp, &iov, 1, ppos);
219}
220
221static ssize_t
222pipe_writev(struct file *filp, const struct iovec *_iov,
223 unsigned long nr_segs, loff_t *ppos)
224{
225 struct inode *inode = filp->f_dentry->d_inode;
226 struct pipe_inode_info *info;
227 ssize_t ret;
228 int do_wakeup;
229 struct iovec *iov = (struct iovec *)_iov;
230 size_t total_len;
231 ssize_t chars;
232
233 total_len = iov_length(iov, nr_segs);
234 /* Null write succeeds. */
235 if (unlikely(total_len == 0))
236 return 0;
237
238 do_wakeup = 0;
239 ret = 0;
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800240 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241 info = inode->i_pipe;
242
243 if (!PIPE_READERS(*inode)) {
244 send_sig(SIGPIPE, current, 0);
245 ret = -EPIPE;
246 goto out;
247 }
248
249 /* We try to merge small writes */
250 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
251 if (info->nrbufs && chars != 0) {
252 int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1);
253 struct pipe_buffer *buf = info->bufs + lastbuf;
254 struct pipe_buf_operations *ops = buf->ops;
255 int offset = buf->offset + buf->len;
256 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
257 void *addr = ops->map(filp, info, buf);
258 int error = pipe_iov_copy_from_user(offset + addr, iov, chars);
259 ops->unmap(info, buf);
260 ret = error;
261 do_wakeup = 1;
262 if (error)
263 goto out;
264 buf->len += chars;
265 total_len -= chars;
266 ret = chars;
267 if (!total_len)
268 goto out;
269 }
270 }
271
272 for (;;) {
273 int bufs;
274 if (!PIPE_READERS(*inode)) {
275 send_sig(SIGPIPE, current, 0);
276 if (!ret) ret = -EPIPE;
277 break;
278 }
279 bufs = info->nrbufs;
280 if (bufs < PIPE_BUFFERS) {
281 int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1);
282 struct pipe_buffer *buf = info->bufs + newbuf;
283 struct page *page = info->tmp_page;
284 int error;
285
286 if (!page) {
287 page = alloc_page(GFP_HIGHUSER);
288 if (unlikely(!page)) {
289 ret = ret ? : -ENOMEM;
290 break;
291 }
292 info->tmp_page = page;
293 }
294 /* Always wakeup, even if the copy fails. Otherwise
295 * we lock up (O_NONBLOCK-)readers that sleep due to
296 * syscall merging.
297 * FIXME! Is this really true?
298 */
299 do_wakeup = 1;
300 chars = PAGE_SIZE;
301 if (chars > total_len)
302 chars = total_len;
303
304 error = pipe_iov_copy_from_user(kmap(page), iov, chars);
305 kunmap(page);
306 if (unlikely(error)) {
307 if (!ret) ret = -EFAULT;
308 break;
309 }
310 ret += chars;
311
312 /* Insert it into the buffer array */
313 buf->page = page;
314 buf->ops = &anon_pipe_buf_ops;
315 buf->offset = 0;
316 buf->len = chars;
317 info->nrbufs = ++bufs;
318 info->tmp_page = NULL;
319
320 total_len -= chars;
321 if (!total_len)
322 break;
323 }
324 if (bufs < PIPE_BUFFERS)
325 continue;
326 if (filp->f_flags & O_NONBLOCK) {
327 if (!ret) ret = -EAGAIN;
328 break;
329 }
330 if (signal_pending(current)) {
331 if (!ret) ret = -ERESTARTSYS;
332 break;
333 }
334 if (do_wakeup) {
335 wake_up_interruptible_sync(PIPE_WAIT(*inode));
336 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
337 do_wakeup = 0;
338 }
339 PIPE_WAITING_WRITERS(*inode)++;
340 pipe_wait(inode);
341 PIPE_WAITING_WRITERS(*inode)--;
342 }
343out:
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800344 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 if (do_wakeup) {
346 wake_up_interruptible(PIPE_WAIT(*inode));
347 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
348 }
349 if (ret > 0)
Christoph Hellwig870f4812006-01-09 20:52:01 -0800350 file_update_time(filp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351 return ret;
352}
353
354static ssize_t
355pipe_write(struct file *filp, const char __user *buf,
356 size_t count, loff_t *ppos)
357{
358 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
359 return pipe_writev(filp, &iov, 1, ppos);
360}
361
362static ssize_t
363bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
364{
365 return -EBADF;
366}
367
368static ssize_t
369bad_pipe_w(struct file *filp, const char __user *buf, size_t count, loff_t *ppos)
370{
371 return -EBADF;
372}
373
374static int
375pipe_ioctl(struct inode *pino, struct file *filp,
376 unsigned int cmd, unsigned long arg)
377{
378 struct inode *inode = filp->f_dentry->d_inode;
379 struct pipe_inode_info *info;
380 int count, buf, nrbufs;
381
382 switch (cmd) {
383 case FIONREAD:
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800384 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 info = inode->i_pipe;
386 count = 0;
387 buf = info->curbuf;
388 nrbufs = info->nrbufs;
389 while (--nrbufs >= 0) {
390 count += info->bufs[buf].len;
391 buf = (buf+1) & (PIPE_BUFFERS-1);
392 }
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800393 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 return put_user(count, (int __user *)arg);
395 default:
396 return -EINVAL;
397 }
398}
399
400/* No kernel lock held - fine */
401static unsigned int
402pipe_poll(struct file *filp, poll_table *wait)
403{
404 unsigned int mask;
405 struct inode *inode = filp->f_dentry->d_inode;
406 struct pipe_inode_info *info = inode->i_pipe;
407 int nrbufs;
408
409 poll_wait(filp, PIPE_WAIT(*inode), wait);
410
411 /* Reading only -- no need for acquiring the semaphore. */
412 nrbufs = info->nrbufs;
413 mask = 0;
414 if (filp->f_mode & FMODE_READ) {
415 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
416 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
417 mask |= POLLHUP;
418 }
419
420 if (filp->f_mode & FMODE_WRITE) {
421 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
Pekka Enberg5e5d7a22005-09-06 15:17:48 -0700422 /*
423 * Most Unices do not set POLLERR for FIFOs but on Linux they
424 * behave exactly like pipes for poll().
425 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 if (!PIPE_READERS(*inode))
427 mask |= POLLERR;
428 }
429
430 return mask;
431}
432
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433static int
434pipe_release(struct inode *inode, int decr, int decw)
435{
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800436 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 PIPE_READERS(*inode) -= decr;
438 PIPE_WRITERS(*inode) -= decw;
439 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
440 free_pipe_info(inode);
441 } else {
442 wake_up_interruptible(PIPE_WAIT(*inode));
443 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
444 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
445 }
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800446 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447
448 return 0;
449}
450
451static int
452pipe_read_fasync(int fd, struct file *filp, int on)
453{
454 struct inode *inode = filp->f_dentry->d_inode;
455 int retval;
456
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800457 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800459 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460
461 if (retval < 0)
462 return retval;
463
464 return 0;
465}
466
467
468static int
469pipe_write_fasync(int fd, struct file *filp, int on)
470{
471 struct inode *inode = filp->f_dentry->d_inode;
472 int retval;
473
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800474 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800476 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700477
478 if (retval < 0)
479 return retval;
480
481 return 0;
482}
483
484
485static int
486pipe_rdwr_fasync(int fd, struct file *filp, int on)
487{
488 struct inode *inode = filp->f_dentry->d_inode;
489 int retval;
490
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800491 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492
493 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
494
495 if (retval >= 0)
496 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
497
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800498 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499
500 if (retval < 0)
501 return retval;
502
503 return 0;
504}
505
506
507static int
508pipe_read_release(struct inode *inode, struct file *filp)
509{
510 pipe_read_fasync(-1, filp, 0);
511 return pipe_release(inode, 1, 0);
512}
513
514static int
515pipe_write_release(struct inode *inode, struct file *filp)
516{
517 pipe_write_fasync(-1, filp, 0);
518 return pipe_release(inode, 0, 1);
519}
520
521static int
522pipe_rdwr_release(struct inode *inode, struct file *filp)
523{
524 int decr, decw;
525
526 pipe_rdwr_fasync(-1, filp, 0);
527 decr = (filp->f_mode & FMODE_READ) != 0;
528 decw = (filp->f_mode & FMODE_WRITE) != 0;
529 return pipe_release(inode, decr, decw);
530}
531
532static int
533pipe_read_open(struct inode *inode, struct file *filp)
534{
535 /* We could have perhaps used atomic_t, but this and friends
536 below are the only places. So it doesn't seem worthwhile. */
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800537 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 PIPE_READERS(*inode)++;
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800539 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540
541 return 0;
542}
543
544static int
545pipe_write_open(struct inode *inode, struct file *filp)
546{
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800547 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 PIPE_WRITERS(*inode)++;
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800549 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550
551 return 0;
552}
553
554static int
555pipe_rdwr_open(struct inode *inode, struct file *filp)
556{
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800557 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 if (filp->f_mode & FMODE_READ)
559 PIPE_READERS(*inode)++;
560 if (filp->f_mode & FMODE_WRITE)
561 PIPE_WRITERS(*inode)++;
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800562 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563
564 return 0;
565}
566
567/*
568 * The file_operations structs are not static because they
569 * are also used in linux/fs/fifo.c to do operations on FIFOs.
570 */
571struct file_operations read_fifo_fops = {
572 .llseek = no_llseek,
573 .read = pipe_read,
574 .readv = pipe_readv,
575 .write = bad_pipe_w,
Pekka Enberg5e5d7a22005-09-06 15:17:48 -0700576 .poll = pipe_poll,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 .ioctl = pipe_ioctl,
578 .open = pipe_read_open,
579 .release = pipe_read_release,
580 .fasync = pipe_read_fasync,
581};
582
583struct file_operations write_fifo_fops = {
584 .llseek = no_llseek,
585 .read = bad_pipe_r,
586 .write = pipe_write,
587 .writev = pipe_writev,
Pekka Enberg5e5d7a22005-09-06 15:17:48 -0700588 .poll = pipe_poll,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 .ioctl = pipe_ioctl,
590 .open = pipe_write_open,
591 .release = pipe_write_release,
592 .fasync = pipe_write_fasync,
593};
594
595struct file_operations rdwr_fifo_fops = {
596 .llseek = no_llseek,
597 .read = pipe_read,
598 .readv = pipe_readv,
599 .write = pipe_write,
600 .writev = pipe_writev,
Pekka Enberg5e5d7a22005-09-06 15:17:48 -0700601 .poll = pipe_poll,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602 .ioctl = pipe_ioctl,
603 .open = pipe_rdwr_open,
604 .release = pipe_rdwr_release,
605 .fasync = pipe_rdwr_fasync,
606};
607
Linus Torvaldsa19cbd42006-03-08 14:03:09 -0800608static struct file_operations read_pipe_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 .llseek = no_llseek,
610 .read = pipe_read,
611 .readv = pipe_readv,
612 .write = bad_pipe_w,
613 .poll = pipe_poll,
614 .ioctl = pipe_ioctl,
615 .open = pipe_read_open,
616 .release = pipe_read_release,
617 .fasync = pipe_read_fasync,
618};
619
Linus Torvaldsa19cbd42006-03-08 14:03:09 -0800620static struct file_operations write_pipe_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621 .llseek = no_llseek,
622 .read = bad_pipe_r,
623 .write = pipe_write,
624 .writev = pipe_writev,
625 .poll = pipe_poll,
626 .ioctl = pipe_ioctl,
627 .open = pipe_write_open,
628 .release = pipe_write_release,
629 .fasync = pipe_write_fasync,
630};
631
Linus Torvaldsa19cbd42006-03-08 14:03:09 -0800632static struct file_operations rdwr_pipe_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 .llseek = no_llseek,
634 .read = pipe_read,
635 .readv = pipe_readv,
636 .write = pipe_write,
637 .writev = pipe_writev,
638 .poll = pipe_poll,
639 .ioctl = pipe_ioctl,
640 .open = pipe_rdwr_open,
641 .release = pipe_rdwr_release,
642 .fasync = pipe_rdwr_fasync,
643};
644
645void free_pipe_info(struct inode *inode)
646{
647 int i;
648 struct pipe_inode_info *info = inode->i_pipe;
649
650 inode->i_pipe = NULL;
651 for (i = 0; i < PIPE_BUFFERS; i++) {
652 struct pipe_buffer *buf = info->bufs + i;
653 if (buf->ops)
654 buf->ops->release(info, buf);
655 }
656 if (info->tmp_page)
657 __free_page(info->tmp_page);
658 kfree(info);
659}
660
661struct inode* pipe_new(struct inode* inode)
662{
663 struct pipe_inode_info *info;
664
Oliver Neukum11b0b5a2006-03-25 03:08:13 -0800665 info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666 if (!info)
667 goto fail_page;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 inode->i_pipe = info;
669
670 init_waitqueue_head(PIPE_WAIT(*inode));
671 PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
672
673 return inode;
674fail_page:
675 return NULL;
676}
677
Eric Dumazetfa3536c2006-03-26 01:37:24 -0800678static struct vfsmount *pipe_mnt __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679static int pipefs_delete_dentry(struct dentry *dentry)
680{
681 return 1;
682}
683static struct dentry_operations pipefs_dentry_operations = {
684 .d_delete = pipefs_delete_dentry,
685};
686
687static struct inode * get_pipe_inode(void)
688{
689 struct inode *inode = new_inode(pipe_mnt->mnt_sb);
690
691 if (!inode)
692 goto fail_inode;
693
694 if(!pipe_new(inode))
695 goto fail_iput;
696 PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
697 inode->i_fop = &rdwr_pipe_fops;
698
699 /*
700 * Mark the inode dirty from the very beginning,
701 * that way it will never be moved to the dirty
702 * list because "mark_inode_dirty()" will think
703 * that it already _is_ on the dirty list.
704 */
705 inode->i_state = I_DIRTY;
706 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
707 inode->i_uid = current->fsuid;
708 inode->i_gid = current->fsgid;
709 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
710 inode->i_blksize = PAGE_SIZE;
711 return inode;
712
713fail_iput:
714 iput(inode);
715fail_inode:
716 return NULL;
717}
718
719int do_pipe(int *fd)
720{
721 struct qstr this;
722 char name[32];
723 struct dentry *dentry;
724 struct inode * inode;
725 struct file *f1, *f2;
726 int error;
727 int i,j;
728
729 error = -ENFILE;
730 f1 = get_empty_filp();
731 if (!f1)
732 goto no_files;
733
734 f2 = get_empty_filp();
735 if (!f2)
736 goto close_f1;
737
738 inode = get_pipe_inode();
739 if (!inode)
740 goto close_f12;
741
742 error = get_unused_fd();
743 if (error < 0)
744 goto close_f12_inode;
745 i = error;
746
747 error = get_unused_fd();
748 if (error < 0)
749 goto close_f12_inode_i;
750 j = error;
751
752 error = -ENOMEM;
753 sprintf(name, "[%lu]", inode->i_ino);
754 this.name = name;
755 this.len = strlen(name);
756 this.hash = inode->i_ino; /* will go */
757 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
758 if (!dentry)
759 goto close_f12_inode_i_j;
760 dentry->d_op = &pipefs_dentry_operations;
761 d_add(dentry, inode);
762 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
763 f1->f_dentry = f2->f_dentry = dget(dentry);
764 f1->f_mapping = f2->f_mapping = inode->i_mapping;
765
766 /* read file */
767 f1->f_pos = f2->f_pos = 0;
768 f1->f_flags = O_RDONLY;
769 f1->f_op = &read_pipe_fops;
770 f1->f_mode = FMODE_READ;
771 f1->f_version = 0;
772
773 /* write file */
774 f2->f_flags = O_WRONLY;
775 f2->f_op = &write_pipe_fops;
776 f2->f_mode = FMODE_WRITE;
777 f2->f_version = 0;
778
779 fd_install(i, f1);
780 fd_install(j, f2);
781 fd[0] = i;
782 fd[1] = j;
783 return 0;
784
785close_f12_inode_i_j:
786 put_unused_fd(j);
787close_f12_inode_i:
788 put_unused_fd(i);
789close_f12_inode:
790 free_pipe_info(inode);
791 iput(inode);
792close_f12:
793 put_filp(f2);
794close_f1:
795 put_filp(f1);
796no_files:
797 return error;
798}
799
800/*
801 * pipefs should _never_ be mounted by userland - too much of security hassle,
802 * no real gain from having the whole whorehouse mounted. So we don't need
803 * any operations on the root directory. However, we need a non-trivial
804 * d_name - pipe: will go nicely and kill the special-casing in procfs.
805 */
806
807static struct super_block *pipefs_get_sb(struct file_system_type *fs_type,
808 int flags, const char *dev_name, void *data)
809{
810 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
811}
812
813static struct file_system_type pipe_fs_type = {
814 .name = "pipefs",
815 .get_sb = pipefs_get_sb,
816 .kill_sb = kill_anon_super,
817};
818
819static int __init init_pipe_fs(void)
820{
821 int err = register_filesystem(&pipe_fs_type);
822 if (!err) {
823 pipe_mnt = kern_mount(&pipe_fs_type);
824 if (IS_ERR(pipe_mnt)) {
825 err = PTR_ERR(pipe_mnt);
826 unregister_filesystem(&pipe_fs_type);
827 }
828 }
829 return err;
830}
831
832static void __exit exit_pipe_fs(void)
833{
834 unregister_filesystem(&pipe_fs_type);
835 mntput(pipe_mnt);
836}
837
838fs_initcall(init_pipe_fs);
839module_exit(exit_pipe_fs);