blob: 5acc001d49f6c72db83216173136701c2169f950 [file] [log] [blame]
Eric W. Biederman77b14db2007-02-14 00:34:12 -08001/*
2 * /proc/sys support
3 */
4
5#include <linux/sysctl.h>
6#include <linux/proc_fs.h>
7#include <linux/security.h>
8#include "internal.h"
9
10static struct dentry_operations proc_sys_dentry_operations;
11static const struct file_operations proc_sys_file_operations;
Jan Engelhardt03a44822008-02-08 04:21:19 -080012static const struct inode_operations proc_sys_inode_operations;
Eric W. Biederman77b14db2007-02-14 00:34:12 -080013
14static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table)
15{
16 /* Refresh the cached information bits in the inode */
17 if (table) {
18 inode->i_uid = 0;
19 inode->i_gid = 0;
20 inode->i_mode = table->mode;
21 if (table->proc_handler) {
22 inode->i_mode |= S_IFREG;
23 inode->i_nlink = 1;
24 } else {
25 inode->i_mode |= S_IFDIR;
26 inode->i_nlink = 0; /* It is too hard to figure out */
27 }
28 }
29}
30
31static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table)
32{
33 struct inode *inode;
34 struct proc_inode *dir_ei, *ei;
35 int depth;
36
37 inode = new_inode(dir->i_sb);
38 if (!inode)
39 goto out;
40
41 /* A directory is always one deeper than it's parent */
42 dir_ei = PROC_I(dir);
43 depth = dir_ei->fd + 1;
44
45 ei = PROC_I(inode);
46 ei->fd = depth;
47 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
48 inode->i_op = &proc_sys_inode_operations;
49 inode->i_fop = &proc_sys_file_operations;
Eric W. Biederman86a71db2007-02-14 00:34:16 -080050 inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
Eric W. Biederman77b14db2007-02-14 00:34:12 -080051 proc_sys_refresh_inode(inode, table);
52out:
53 return inode;
54}
55
56static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth)
57{
58 for (;;) {
59 struct proc_inode *ei;
60
61 ei = PROC_I(dentry->d_inode);
62 if (ei->fd == depth)
63 break; /* found */
64
65 dentry = dentry->d_parent;
66 }
67 return dentry;
68}
69
70static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table,
71 struct qstr *name)
72{
73 int len;
74 for ( ; table->ctl_name || table->procname; table++) {
75
76 if (!table->procname)
77 continue;
78
79 len = strlen(table->procname);
80 if (len != name->len)
81 continue;
82
83 if (memcmp(table->procname, name->name, len) != 0)
84 continue;
85
86 /* I have a match */
87 return table;
88 }
89 return NULL;
90}
91
92static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry,
93 struct ctl_table *table)
94{
95 struct dentry *ancestor;
96 struct proc_inode *ei;
97 int depth, i;
98
99 ei = PROC_I(dentry->d_inode);
100 depth = ei->fd;
101
102 if (depth == 0)
103 return table;
104
105 for (i = 1; table && (i <= depth); i++) {
106 ancestor = proc_sys_ancestor(dentry, i);
107 table = proc_sys_lookup_table_one(table, &ancestor->d_name);
108 if (table)
109 table = table->child;
110 }
111 return table;
112
113}
114static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent,
115 struct qstr *name,
116 struct ctl_table *table)
117{
118 table = proc_sys_lookup_table(dparent, table);
119 if (table)
120 table = proc_sys_lookup_table_one(table, name);
121 return table;
122}
123
124static struct ctl_table *do_proc_sys_lookup(struct dentry *parent,
125 struct qstr *name,
126 struct ctl_table_header **ptr)
127{
128 struct ctl_table_header *head;
129 struct ctl_table *table = NULL;
130
131 for (head = sysctl_head_next(NULL); head;
132 head = sysctl_head_next(head)) {
133 table = proc_sys_lookup_entry(parent, name, head->ctl_table);
134 if (table)
135 break;
136 }
137 *ptr = head;
138 return table;
139}
140
141static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
142 struct nameidata *nd)
143{
144 struct ctl_table_header *head;
145 struct inode *inode;
146 struct dentry *err;
147 struct ctl_table *table;
148
149 err = ERR_PTR(-ENOENT);
150 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
151 if (!table)
152 goto out;
153
154 err = ERR_PTR(-ENOMEM);
155 inode = proc_sys_make_inode(dir, table);
156 if (!inode)
157 goto out;
158
159 err = NULL;
160 dentry->d_op = &proc_sys_dentry_operations;
161 d_add(dentry, inode);
162
163out:
164 sysctl_head_finish(head);
165 return err;
166}
167
Pavel Emelyanov7708bfb2008-04-29 01:02:40 -0700168static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
169 size_t count, loff_t *ppos, int write)
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800170{
171 struct dentry *dentry = filp->f_dentry;
172 struct ctl_table_header *head;
173 struct ctl_table *table;
David Howells2a2da532007-10-25 15:27:40 +0100174 ssize_t error;
175 size_t res;
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800176
177 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
178 /* Has the sysctl entry disappeared on us? */
179 error = -ENOENT;
180 if (!table)
181 goto out;
182
183 /* Has the sysctl entry been replaced by a directory? */
184 error = -EISDIR;
185 if (!table->proc_handler)
186 goto out;
187
188 /*
189 * At this point we know that the sysctl was not unregistered
190 * and won't be until we finish.
191 */
192 error = -EPERM;
Pavel Emelyanovd7321cd2008-04-29 01:02:44 -0700193 if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ))
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800194 goto out;
195
196 /* careful: calling conventions are nasty here */
197 res = count;
Pavel Emelyanov7708bfb2008-04-29 01:02:40 -0700198 error = table->proc_handler(table, write, filp, buf, &res, ppos);
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800199 if (!error)
200 error = res;
201out:
202 sysctl_head_finish(head);
203
204 return error;
205}
206
Pavel Emelyanov7708bfb2008-04-29 01:02:40 -0700207static ssize_t proc_sys_read(struct file *filp, char __user *buf,
208 size_t count, loff_t *ppos)
209{
210 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0);
211}
212
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800213static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
214 size_t count, loff_t *ppos)
215{
Pavel Emelyanov7708bfb2008-04-29 01:02:40 -0700216 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800217}
218
219
220static int proc_sys_fill_cache(struct file *filp, void *dirent,
221 filldir_t filldir, struct ctl_table *table)
222{
223 struct ctl_table_header *head;
224 struct ctl_table *child_table = NULL;
225 struct dentry *child, *dir = filp->f_path.dentry;
226 struct inode *inode;
227 struct qstr qname;
228 ino_t ino = 0;
229 unsigned type = DT_UNKNOWN;
230 int ret;
231
232 qname.name = table->procname;
233 qname.len = strlen(table->procname);
234 qname.hash = full_name_hash(qname.name, qname.len);
235
236 /* Suppress duplicates.
237 * Only fill a directory entry if it is the value that
238 * an ordinary lookup of that name returns. Hide all
239 * others.
240 *
241 * If we ever cache this translation in the dcache
242 * I should do a dcache lookup first. But for now
243 * it is just simpler not to.
244 */
245 ret = 0;
246 child_table = do_proc_sys_lookup(dir, &qname, &head);
247 sysctl_head_finish(head);
248 if (child_table != table)
249 return 0;
250
251 child = d_lookup(dir, &qname);
252 if (!child) {
253 struct dentry *new;
254 new = d_alloc(dir, &qname);
255 if (new) {
256 inode = proc_sys_make_inode(dir->d_inode, table);
257 if (!inode)
258 child = ERR_PTR(-ENOMEM);
259 else {
260 new->d_op = &proc_sys_dentry_operations;
261 d_add(new, inode);
262 }
263 if (child)
264 dput(new);
265 else
266 child = new;
267 }
268 }
269 if (!child || IS_ERR(child) || !child->d_inode)
270 goto end_instantiate;
271 inode = child->d_inode;
272 if (inode) {
273 ino = inode->i_ino;
274 type = inode->i_mode >> 12;
275 }
276 dput(child);
277end_instantiate:
278 if (!ino)
279 ino= find_inode_number(dir, &qname);
280 if (!ino)
281 ino = 1;
282 return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
283}
284
285static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
286{
287 struct dentry *dentry = filp->f_dentry;
288 struct inode *inode = dentry->d_inode;
289 struct ctl_table_header *head = NULL;
290 struct ctl_table *table;
291 unsigned long pos;
292 int ret;
293
294 ret = -ENOTDIR;
295 if (!S_ISDIR(inode->i_mode))
296 goto out;
297
298 ret = 0;
299 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */
300 if (filp->f_pos == 0) {
301 if (filldir(dirent, ".", 1, filp->f_pos,
302 inode->i_ino, DT_DIR) < 0)
303 goto out;
304 filp->f_pos++;
305 }
306 if (filp->f_pos == 1) {
307 if (filldir(dirent, "..", 2, filp->f_pos,
308 parent_ino(dentry), DT_DIR) < 0)
309 goto out;
310 filp->f_pos++;
311 }
312 pos = 2;
313
314 /* - Find each instance of the directory
315 * - Read all entries in each instance
316 * - Before returning an entry to user space lookup the entry
317 * by name and if I find a different entry don't return
318 * this one because it means it is a buried dup.
319 * For sysctl this should only happen for directory entries.
320 */
321 for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) {
322 table = proc_sys_lookup_table(dentry, head->ctl_table);
323
324 if (!table)
325 continue;
326
327 for (; table->ctl_name || table->procname; table++, pos++) {
328 /* Can't do anything without a proc name */
329 if (!table->procname)
330 continue;
331
332 if (pos < filp->f_pos)
333 continue;
334
335 if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0)
336 goto out;
337 filp->f_pos = pos + 1;
338 }
339 }
340 ret = 1;
341out:
342 sysctl_head_finish(head);
343 return ret;
344}
345
346static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd)
347{
348 /*
349 * sysctl entries that are not writeable,
350 * are _NOT_ writeable, capabilities or not.
351 */
352 struct ctl_table_header *head;
353 struct ctl_table *table;
354 struct dentry *dentry;
355 int mode;
356 int depth;
357 int error;
358
359 head = NULL;
360 depth = PROC_I(inode)->fd;
361
362 /* First check the cached permissions, in case we don't have
363 * enough information to lookup the sysctl table entry.
364 */
365 error = -EACCES;
366 mode = inode->i_mode;
367
368 if (current->euid == 0)
369 mode >>= 6;
370 else if (in_group_p(0))
371 mode >>= 3;
372
373 if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
374 error = 0;
375
376 /* If we can't get a sysctl table entry the permission
377 * checks on the cached mode will have to be enough.
378 */
379 if (!nd || !depth)
380 goto out;
381
Jan Blunck4ac91372008-02-14 19:34:32 -0800382 dentry = nd->path.dentry;
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800383 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
384
385 /* If the entry does not exist deny permission */
386 error = -EACCES;
387 if (!table)
388 goto out;
389
390 /* Use the permissions on the sysctl table entry */
Pavel Emelyanovd7321cd2008-04-29 01:02:44 -0700391 error = sysctl_perm(head->root, table, mask);
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800392out:
393 sysctl_head_finish(head);
394 return error;
395}
396
397static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
398{
399 struct inode *inode = dentry->d_inode;
400 int error;
401
402 if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
403 return -EPERM;
404
405 error = inode_change_ok(inode, attr);
John Johansen9d0633c2007-05-08 00:29:44 -0700406 if (!error)
407 error = inode_setattr(inode, attr);
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800408
409 return error;
410}
411
412/* I'm lazy and don't distinguish between files and directories,
413 * until access time.
414 */
415static const struct file_operations proc_sys_file_operations = {
416 .read = proc_sys_read,
417 .write = proc_sys_write,
418 .readdir = proc_sys_readdir,
419};
420
Jan Engelhardt03a44822008-02-08 04:21:19 -0800421static const struct inode_operations proc_sys_inode_operations = {
Eric W. Biederman77b14db2007-02-14 00:34:12 -0800422 .lookup = proc_sys_lookup,
423 .permission = proc_sys_permission,
424 .setattr = proc_sys_setattr,
425};
426
427static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
428{
429 struct ctl_table_header *head;
430 struct ctl_table *table;
431 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
432 proc_sys_refresh_inode(dentry->d_inode, table);
433 sysctl_head_finish(head);
434 return !!table;
435}
436
437static struct dentry_operations proc_sys_dentry_operations = {
438 .d_revalidate = proc_sys_revalidate,
439};
440
441static struct proc_dir_entry *proc_sys_root;
442
443int proc_sys_init(void)
444{
445 proc_sys_root = proc_mkdir("sys", NULL);
446 proc_sys_root->proc_iops = &proc_sys_inode_operations;
447 proc_sys_root->proc_fops = &proc_sys_file_operations;
448 proc_sys_root->nlink = 0;
449 return 0;
450}