blob: 2ce19c000d2adb40afc17205edcb937cbb84d730 [file] [log] [blame]
Davide Libenzie1ad7462007-05-10 22:23:19 -07001/*
2 * fs/eventfd.c
3 *
4 * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
5 *
6 */
7
8#include <linux/file.h>
9#include <linux/poll.h>
10#include <linux/init.h>
11#include <linux/fs.h>
12#include <linux/sched.h>
13#include <linux/kernel.h>
14#include <linux/list.h>
15#include <linux/spinlock.h>
16#include <linux/anon_inodes.h>
17#include <linux/eventfd.h>
18
19struct eventfd_ctx {
Davide Libenzie1ad7462007-05-10 22:23:19 -070020 wait_queue_head_t wqh;
21 /*
22 * Every time that a write(2) is performed on an eventfd, the
23 * value of the __u64 being written is added to "count" and a
24 * wakeup is performed on "wqh". A read(2) will return the "count"
25 * value to userspace, and will reset "count" to zero. The kernel
26 * size eventfd_signal() also, adds to the "count" counter and
27 * issue a wakeup.
28 */
29 __u64 count;
30};
31
32/*
33 * Adds "n" to the eventfd counter "count". Returns "n" in case of
34 * success, or a value lower then "n" in case of coutner overflow.
35 * This function is supposed to be called by the kernel in paths
36 * that do not allow sleeping. In this function we allow the counter
37 * to reach the ULLONG_MAX value, and we signal this as overflow
38 * condition by returining a POLLERR to poll(2).
39 */
40int eventfd_signal(struct file *file, int n)
41{
42 struct eventfd_ctx *ctx = file->private_data;
43 unsigned long flags;
44
45 if (n < 0)
46 return -EINVAL;
Davide Libenzid48eb232007-05-18 12:02:33 -070047 spin_lock_irqsave(&ctx->wqh.lock, flags);
Davide Libenzie1ad7462007-05-10 22:23:19 -070048 if (ULLONG_MAX - ctx->count < n)
49 n = (int) (ULLONG_MAX - ctx->count);
50 ctx->count += n;
51 if (waitqueue_active(&ctx->wqh))
52 wake_up_locked(&ctx->wqh);
Davide Libenzid48eb232007-05-18 12:02:33 -070053 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
Davide Libenzie1ad7462007-05-10 22:23:19 -070054
55 return n;
56}
57
58static int eventfd_release(struct inode *inode, struct file *file)
59{
60 kfree(file->private_data);
61 return 0;
62}
63
64static unsigned int eventfd_poll(struct file *file, poll_table *wait)
65{
66 struct eventfd_ctx *ctx = file->private_data;
67 unsigned int events = 0;
68 unsigned long flags;
69
70 poll_wait(file, &ctx->wqh, wait);
71
Davide Libenzid48eb232007-05-18 12:02:33 -070072 spin_lock_irqsave(&ctx->wqh.lock, flags);
Davide Libenzie1ad7462007-05-10 22:23:19 -070073 if (ctx->count > 0)
74 events |= POLLIN;
75 if (ctx->count == ULLONG_MAX)
76 events |= POLLERR;
77 if (ULLONG_MAX - 1 > ctx->count)
78 events |= POLLOUT;
Davide Libenzid48eb232007-05-18 12:02:33 -070079 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
Davide Libenzie1ad7462007-05-10 22:23:19 -070080
81 return events;
82}
83
84static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
85 loff_t *ppos)
86{
87 struct eventfd_ctx *ctx = file->private_data;
88 ssize_t res;
89 __u64 ucnt;
90 DECLARE_WAITQUEUE(wait, current);
91
92 if (count < sizeof(ucnt))
93 return -EINVAL;
Davide Libenzid48eb232007-05-18 12:02:33 -070094 spin_lock_irq(&ctx->wqh.lock);
Davide Libenzie1ad7462007-05-10 22:23:19 -070095 res = -EAGAIN;
96 ucnt = ctx->count;
97 if (ucnt > 0)
98 res = sizeof(ucnt);
99 else if (!(file->f_flags & O_NONBLOCK)) {
100 __add_wait_queue(&ctx->wqh, &wait);
101 for (res = 0;;) {
102 set_current_state(TASK_INTERRUPTIBLE);
103 if (ctx->count > 0) {
104 ucnt = ctx->count;
105 res = sizeof(ucnt);
106 break;
107 }
108 if (signal_pending(current)) {
109 res = -ERESTARTSYS;
110 break;
111 }
Davide Libenzid48eb232007-05-18 12:02:33 -0700112 spin_unlock_irq(&ctx->wqh.lock);
Davide Libenzie1ad7462007-05-10 22:23:19 -0700113 schedule();
Davide Libenzid48eb232007-05-18 12:02:33 -0700114 spin_lock_irq(&ctx->wqh.lock);
Davide Libenzie1ad7462007-05-10 22:23:19 -0700115 }
116 __remove_wait_queue(&ctx->wqh, &wait);
117 __set_current_state(TASK_RUNNING);
118 }
119 if (res > 0) {
120 ctx->count = 0;
121 if (waitqueue_active(&ctx->wqh))
122 wake_up_locked(&ctx->wqh);
123 }
Davide Libenzid48eb232007-05-18 12:02:33 -0700124 spin_unlock_irq(&ctx->wqh.lock);
Davide Libenzie1ad7462007-05-10 22:23:19 -0700125 if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
126 return -EFAULT;
127
128 return res;
129}
130
131static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
132 loff_t *ppos)
133{
134 struct eventfd_ctx *ctx = file->private_data;
135 ssize_t res;
136 __u64 ucnt;
137 DECLARE_WAITQUEUE(wait, current);
138
139 if (count < sizeof(ucnt))
140 return -EINVAL;
141 if (copy_from_user(&ucnt, buf, sizeof(ucnt)))
142 return -EFAULT;
143 if (ucnt == ULLONG_MAX)
144 return -EINVAL;
Davide Libenzid48eb232007-05-18 12:02:33 -0700145 spin_lock_irq(&ctx->wqh.lock);
Davide Libenzie1ad7462007-05-10 22:23:19 -0700146 res = -EAGAIN;
147 if (ULLONG_MAX - ctx->count > ucnt)
148 res = sizeof(ucnt);
149 else if (!(file->f_flags & O_NONBLOCK)) {
150 __add_wait_queue(&ctx->wqh, &wait);
151 for (res = 0;;) {
152 set_current_state(TASK_INTERRUPTIBLE);
153 if (ULLONG_MAX - ctx->count > ucnt) {
154 res = sizeof(ucnt);
155 break;
156 }
157 if (signal_pending(current)) {
158 res = -ERESTARTSYS;
159 break;
160 }
Davide Libenzid48eb232007-05-18 12:02:33 -0700161 spin_unlock_irq(&ctx->wqh.lock);
Davide Libenzie1ad7462007-05-10 22:23:19 -0700162 schedule();
Davide Libenzid48eb232007-05-18 12:02:33 -0700163 spin_lock_irq(&ctx->wqh.lock);
Davide Libenzie1ad7462007-05-10 22:23:19 -0700164 }
165 __remove_wait_queue(&ctx->wqh, &wait);
166 __set_current_state(TASK_RUNNING);
167 }
168 if (res > 0) {
169 ctx->count += ucnt;
170 if (waitqueue_active(&ctx->wqh))
171 wake_up_locked(&ctx->wqh);
172 }
Davide Libenzid48eb232007-05-18 12:02:33 -0700173 spin_unlock_irq(&ctx->wqh.lock);
Davide Libenzie1ad7462007-05-10 22:23:19 -0700174
175 return res;
176}
177
178static const struct file_operations eventfd_fops = {
179 .release = eventfd_release,
180 .poll = eventfd_poll,
181 .read = eventfd_read,
182 .write = eventfd_write,
183};
184
185struct file *eventfd_fget(int fd)
186{
187 struct file *file;
188
189 file = fget(fd);
190 if (!file)
191 return ERR_PTR(-EBADF);
192 if (file->f_op != &eventfd_fops) {
193 fput(file);
194 return ERR_PTR(-EINVAL);
195 }
196
197 return file;
198}
199
200asmlinkage long sys_eventfd(unsigned int count)
201{
202 int error, fd;
203 struct eventfd_ctx *ctx;
204 struct file *file;
205 struct inode *inode;
206
207 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
208 if (!ctx)
209 return -ENOMEM;
210
211 init_waitqueue_head(&ctx->wqh);
Davide Libenzie1ad7462007-05-10 22:23:19 -0700212 ctx->count = count;
213
214 /*
215 * When we call this, the initialization must be complete, since
216 * anon_inode_getfd() will install the fd.
217 */
218 error = anon_inode_getfd(&fd, &inode, &file, "[eventfd]",
219 &eventfd_fops, ctx);
220 if (!error)
221 return fd;
222
223 kfree(ctx);
224 return error;
225}
226