blob: acf6c1550f2747105500dd55bb2c39709d2e9687 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * sysctl.c: General linux system control interface
3 *
4 * Begun 24 March 1995, Stephen Tweedie
5 * Added /proc support, Dec 1995
6 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9 * Dynamic registration fixes, Stephen Tweedie.
10 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12 * Horn.
13 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16 * Wendling.
17 * The list_for_each() macro wasn't appropriate for the sysctl loop.
18 * Removed it and replaced it with older style, 03/23/00, Bill Wendling
19 */
20
21#include <linux/config.h>
22#include <linux/module.h>
23#include <linux/mm.h>
24#include <linux/swap.h>
25#include <linux/slab.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
Randy.Dunlapc59ede72006-01-11 12:17:46 -080028#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <linux/ctype.h>
30#include <linux/utsname.h>
31#include <linux/capability.h>
32#include <linux/smp_lock.h>
33#include <linux/init.h>
34#include <linux/kernel.h>
Kay Sievers0296b222005-11-11 05:33:52 +010035#include <linux/kobject.h>
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -030036#include <linux/net.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070037#include <linux/sysrq.h>
38#include <linux/highuid.h>
39#include <linux/writeback.h>
40#include <linux/hugetlb.h>
41#include <linux/security.h>
42#include <linux/initrd.h>
43#include <linux/times.h>
44#include <linux/limits.h>
45#include <linux/dcache.h>
46#include <linux/syscalls.h>
Pavel Machekc255d842006-02-20 18:27:58 -080047#include <linux/nfs_fs.h>
48#include <linux/acpi.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070049
50#include <asm/uaccess.h>
51#include <asm/processor.h>
52
Linus Torvalds1da177e2005-04-16 15:20:36 -070053#if defined(CONFIG_SYSCTL)
54
55/* External variables not in a header file. */
56extern int C_A_D;
57extern int sysctl_overcommit_memory;
58extern int sysctl_overcommit_ratio;
59extern int max_threads;
60extern int sysrq_enabled;
61extern int core_uses_pid;
Alan Coxd6e71142005-06-23 00:09:43 -070062extern int suid_dumpable;
Linus Torvalds1da177e2005-04-16 15:20:36 -070063extern char core_pattern[];
64extern int cad_pid;
65extern int pid_max;
66extern int min_free_kbytes;
67extern int printk_ratelimit_jiffies;
68extern int printk_ratelimit_burst;
69extern int pid_max_min, pid_max_max;
Andrew Morton9d0243b2006-01-08 01:00:39 -080070extern int sysctl_drop_caches;
Rohit Seth8ad4b1f2006-01-08 01:00:40 -080071extern int percpu_pagelist_fraction;
Linus Torvalds1da177e2005-04-16 15:20:36 -070072
73#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
74int unknown_nmi_panic;
75extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *,
76 void __user *, size_t *, loff_t *);
77#endif
78
79/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
80static int maxolduid = 65535;
81static int minolduid;
Rohit Seth8ad4b1f2006-01-08 01:00:40 -080082static int min_percpu_pagelist_fract = 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
84static int ngroups_max = NGROUPS_MAX;
85
86#ifdef CONFIG_KMOD
87extern char modprobe_path[];
88#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -070089#ifdef CONFIG_CHR_DEV_SG
90extern int sg_big_buff;
91#endif
92#ifdef CONFIG_SYSVIPC
93extern size_t shm_ctlmax;
94extern size_t shm_ctlall;
95extern int shm_ctlmni;
96extern int msg_ctlmax;
97extern int msg_ctlmnb;
98extern int msg_ctlmni;
99extern int sem_ctls[];
100#endif
101
102#ifdef __sparc__
103extern char reboot_command [];
104extern int stop_a_enabled;
105extern int scons_pwroff;
106#endif
107
108#ifdef __hppa__
109extern int pwrsw_enabled;
110extern int unaligned_enabled;
111#endif
112
Martin Schwidefsky347a8dc2006-01-06 00:19:28 -0800113#ifdef CONFIG_S390
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114#ifdef CONFIG_MATHEMU
115extern int sysctl_ieee_emulation_warnings;
116#endif
117extern int sysctl_userprocess_debug;
Martin Schwidefsky951f22d2005-07-27 11:44:57 -0700118extern int spin_retry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119#endif
120
121extern int sysctl_hz_timer;
122
123#ifdef CONFIG_BSD_PROCESS_ACCT
124extern int acct_parm[];
125#endif
126
Jes Sorensend2b176e2006-02-28 09:42:23 -0800127#ifdef CONFIG_IA64
128extern int no_unaligned_warning;
129#endif
130
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
132 ctl_table *, void **);
133static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
134 void __user *buffer, size_t *lenp, loff_t *ppos);
135
136static ctl_table root_table[];
137static struct ctl_table_header root_table_header =
138 { root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
139
140static ctl_table kern_table[];
141static ctl_table vm_table[];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142static ctl_table proc_table[];
143static ctl_table fs_table[];
144static ctl_table debug_table[];
145static ctl_table dev_table[];
146extern ctl_table random_table[];
147#ifdef CONFIG_UNIX98_PTYS
148extern ctl_table pty_table[];
149#endif
Robert Love0399cb02005-07-13 12:38:18 -0400150#ifdef CONFIG_INOTIFY
151extern ctl_table inotify_table[];
152#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153
154#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
155int sysctl_legacy_va_layout;
156#endif
157
158/* /proc declarations: */
159
160#ifdef CONFIG_PROC_FS
161
162static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *);
163static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *);
164static int proc_opensys(struct inode *, struct file *);
165
166struct file_operations proc_sys_file_operations = {
167 .open = proc_opensys,
168 .read = proc_readsys,
169 .write = proc_writesys,
170};
171
172extern struct proc_dir_entry *proc_sys_root;
173
Al Viro330d57f2005-11-04 10:18:40 +0000174static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
176#endif
177
178/* The default sysctl tables: */
179
180static ctl_table root_table[] = {
181 {
182 .ctl_name = CTL_KERN,
183 .procname = "kernel",
184 .mode = 0555,
185 .child = kern_table,
186 },
187 {
188 .ctl_name = CTL_VM,
189 .procname = "vm",
190 .mode = 0555,
191 .child = vm_table,
192 },
193#ifdef CONFIG_NET
194 {
195 .ctl_name = CTL_NET,
196 .procname = "net",
197 .mode = 0555,
198 .child = net_table,
199 },
200#endif
201 {
202 .ctl_name = CTL_PROC,
203 .procname = "proc",
204 .mode = 0555,
205 .child = proc_table,
206 },
207 {
208 .ctl_name = CTL_FS,
209 .procname = "fs",
210 .mode = 0555,
211 .child = fs_table,
212 },
213 {
214 .ctl_name = CTL_DEBUG,
215 .procname = "debug",
216 .mode = 0555,
217 .child = debug_table,
218 },
219 {
220 .ctl_name = CTL_DEV,
221 .procname = "dev",
222 .mode = 0555,
223 .child = dev_table,
224 },
Robert Love0eeca282005-07-12 17:06:03 -0400225
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226 { .ctl_name = 0 }
227};
228
229static ctl_table kern_table[] = {
230 {
231 .ctl_name = KERN_OSTYPE,
232 .procname = "ostype",
233 .data = system_utsname.sysname,
234 .maxlen = sizeof(system_utsname.sysname),
235 .mode = 0444,
236 .proc_handler = &proc_doutsstring,
237 .strategy = &sysctl_string,
238 },
239 {
240 .ctl_name = KERN_OSRELEASE,
241 .procname = "osrelease",
242 .data = system_utsname.release,
243 .maxlen = sizeof(system_utsname.release),
244 .mode = 0444,
245 .proc_handler = &proc_doutsstring,
246 .strategy = &sysctl_string,
247 },
248 {
249 .ctl_name = KERN_VERSION,
250 .procname = "version",
251 .data = system_utsname.version,
252 .maxlen = sizeof(system_utsname.version),
253 .mode = 0444,
254 .proc_handler = &proc_doutsstring,
255 .strategy = &sysctl_string,
256 },
257 {
258 .ctl_name = KERN_NODENAME,
259 .procname = "hostname",
260 .data = system_utsname.nodename,
261 .maxlen = sizeof(system_utsname.nodename),
262 .mode = 0644,
263 .proc_handler = &proc_doutsstring,
264 .strategy = &sysctl_string,
265 },
266 {
267 .ctl_name = KERN_DOMAINNAME,
268 .procname = "domainname",
269 .data = system_utsname.domainname,
270 .maxlen = sizeof(system_utsname.domainname),
271 .mode = 0644,
272 .proc_handler = &proc_doutsstring,
273 .strategy = &sysctl_string,
274 },
275 {
276 .ctl_name = KERN_PANIC,
277 .procname = "panic",
278 .data = &panic_timeout,
279 .maxlen = sizeof(int),
280 .mode = 0644,
281 .proc_handler = &proc_dointvec,
282 },
283 {
284 .ctl_name = KERN_CORE_USES_PID,
285 .procname = "core_uses_pid",
286 .data = &core_uses_pid,
287 .maxlen = sizeof(int),
288 .mode = 0644,
289 .proc_handler = &proc_dointvec,
290 },
291 {
292 .ctl_name = KERN_CORE_PATTERN,
293 .procname = "core_pattern",
294 .data = core_pattern,
295 .maxlen = 64,
296 .mode = 0644,
297 .proc_handler = &proc_dostring,
298 .strategy = &sysctl_string,
299 },
300 {
301 .ctl_name = KERN_TAINTED,
302 .procname = "tainted",
303 .data = &tainted,
304 .maxlen = sizeof(int),
305 .mode = 0444,
306 .proc_handler = &proc_dointvec,
307 },
308 {
309 .ctl_name = KERN_CAP_BSET,
310 .procname = "cap-bound",
311 .data = &cap_bset,
312 .maxlen = sizeof(kernel_cap_t),
313 .mode = 0600,
314 .proc_handler = &proc_dointvec_bset,
315 },
316#ifdef CONFIG_BLK_DEV_INITRD
317 {
318 .ctl_name = KERN_REALROOTDEV,
319 .procname = "real-root-dev",
320 .data = &real_root_dev,
321 .maxlen = sizeof(int),
322 .mode = 0644,
323 .proc_handler = &proc_dointvec,
324 },
325#endif
326#ifdef __sparc__
327 {
328 .ctl_name = KERN_SPARC_REBOOT,
329 .procname = "reboot-cmd",
330 .data = reboot_command,
331 .maxlen = 256,
332 .mode = 0644,
333 .proc_handler = &proc_dostring,
334 .strategy = &sysctl_string,
335 },
336 {
337 .ctl_name = KERN_SPARC_STOP_A,
338 .procname = "stop-a",
339 .data = &stop_a_enabled,
340 .maxlen = sizeof (int),
341 .mode = 0644,
342 .proc_handler = &proc_dointvec,
343 },
344 {
345 .ctl_name = KERN_SPARC_SCONS_PWROFF,
346 .procname = "scons-poweroff",
347 .data = &scons_pwroff,
348 .maxlen = sizeof (int),
349 .mode = 0644,
350 .proc_handler = &proc_dointvec,
351 },
352#endif
353#ifdef __hppa__
354 {
355 .ctl_name = KERN_HPPA_PWRSW,
356 .procname = "soft-power",
357 .data = &pwrsw_enabled,
358 .maxlen = sizeof (int),
359 .mode = 0644,
360 .proc_handler = &proc_dointvec,
361 },
362 {
363 .ctl_name = KERN_HPPA_UNALIGNED,
364 .procname = "unaligned-trap",
365 .data = &unaligned_enabled,
366 .maxlen = sizeof (int),
367 .mode = 0644,
368 .proc_handler = &proc_dointvec,
369 },
370#endif
371 {
372 .ctl_name = KERN_CTLALTDEL,
373 .procname = "ctrl-alt-del",
374 .data = &C_A_D,
375 .maxlen = sizeof(int),
376 .mode = 0644,
377 .proc_handler = &proc_dointvec,
378 },
379 {
380 .ctl_name = KERN_PRINTK,
381 .procname = "printk",
382 .data = &console_loglevel,
383 .maxlen = 4*sizeof(int),
384 .mode = 0644,
385 .proc_handler = &proc_dointvec,
386 },
387#ifdef CONFIG_KMOD
388 {
389 .ctl_name = KERN_MODPROBE,
390 .procname = "modprobe",
391 .data = &modprobe_path,
392 .maxlen = KMOD_PATH_LEN,
393 .mode = 0644,
394 .proc_handler = &proc_dostring,
395 .strategy = &sysctl_string,
396 },
397#endif
398#ifdef CONFIG_HOTPLUG
399 {
400 .ctl_name = KERN_HOTPLUG,
401 .procname = "hotplug",
Kay Sievers312c0042005-11-16 09:00:00 +0100402 .data = &uevent_helper,
403 .maxlen = UEVENT_HELPER_PATH_LEN,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 .mode = 0644,
405 .proc_handler = &proc_dostring,
406 .strategy = &sysctl_string,
407 },
408#endif
409#ifdef CONFIG_CHR_DEV_SG
410 {
411 .ctl_name = KERN_SG_BIG_BUFF,
412 .procname = "sg-big-buff",
413 .data = &sg_big_buff,
414 .maxlen = sizeof (int),
415 .mode = 0444,
416 .proc_handler = &proc_dointvec,
417 },
418#endif
419#ifdef CONFIG_BSD_PROCESS_ACCT
420 {
421 .ctl_name = KERN_ACCT,
422 .procname = "acct",
423 .data = &acct_parm,
424 .maxlen = 3*sizeof(int),
425 .mode = 0644,
426 .proc_handler = &proc_dointvec,
427 },
428#endif
429#ifdef CONFIG_SYSVIPC
430 {
431 .ctl_name = KERN_SHMMAX,
432 .procname = "shmmax",
433 .data = &shm_ctlmax,
434 .maxlen = sizeof (size_t),
435 .mode = 0644,
436 .proc_handler = &proc_doulongvec_minmax,
437 },
438 {
439 .ctl_name = KERN_SHMALL,
440 .procname = "shmall",
441 .data = &shm_ctlall,
442 .maxlen = sizeof (size_t),
443 .mode = 0644,
444 .proc_handler = &proc_doulongvec_minmax,
445 },
446 {
447 .ctl_name = KERN_SHMMNI,
448 .procname = "shmmni",
449 .data = &shm_ctlmni,
450 .maxlen = sizeof (int),
451 .mode = 0644,
452 .proc_handler = &proc_dointvec,
453 },
454 {
455 .ctl_name = KERN_MSGMAX,
456 .procname = "msgmax",
457 .data = &msg_ctlmax,
458 .maxlen = sizeof (int),
459 .mode = 0644,
460 .proc_handler = &proc_dointvec,
461 },
462 {
463 .ctl_name = KERN_MSGMNI,
464 .procname = "msgmni",
465 .data = &msg_ctlmni,
466 .maxlen = sizeof (int),
467 .mode = 0644,
468 .proc_handler = &proc_dointvec,
469 },
470 {
471 .ctl_name = KERN_MSGMNB,
472 .procname = "msgmnb",
473 .data = &msg_ctlmnb,
474 .maxlen = sizeof (int),
475 .mode = 0644,
476 .proc_handler = &proc_dointvec,
477 },
478 {
479 .ctl_name = KERN_SEM,
480 .procname = "sem",
481 .data = &sem_ctls,
482 .maxlen = 4*sizeof (int),
483 .mode = 0644,
484 .proc_handler = &proc_dointvec,
485 },
486#endif
487#ifdef CONFIG_MAGIC_SYSRQ
488 {
489 .ctl_name = KERN_SYSRQ,
490 .procname = "sysrq",
491 .data = &sysrq_enabled,
492 .maxlen = sizeof (int),
493 .mode = 0644,
494 .proc_handler = &proc_dointvec,
495 },
496#endif
497 {
498 .ctl_name = KERN_CADPID,
499 .procname = "cad_pid",
500 .data = &cad_pid,
501 .maxlen = sizeof (int),
502 .mode = 0600,
503 .proc_handler = &proc_dointvec,
504 },
505 {
506 .ctl_name = KERN_MAX_THREADS,
507 .procname = "threads-max",
508 .data = &max_threads,
509 .maxlen = sizeof(int),
510 .mode = 0644,
511 .proc_handler = &proc_dointvec,
512 },
513 {
514 .ctl_name = KERN_RANDOM,
515 .procname = "random",
516 .mode = 0555,
517 .child = random_table,
518 },
519#ifdef CONFIG_UNIX98_PTYS
520 {
521 .ctl_name = KERN_PTY,
522 .procname = "pty",
523 .mode = 0555,
524 .child = pty_table,
525 },
526#endif
527 {
528 .ctl_name = KERN_OVERFLOWUID,
529 .procname = "overflowuid",
530 .data = &overflowuid,
531 .maxlen = sizeof(int),
532 .mode = 0644,
533 .proc_handler = &proc_dointvec_minmax,
534 .strategy = &sysctl_intvec,
535 .extra1 = &minolduid,
536 .extra2 = &maxolduid,
537 },
538 {
539 .ctl_name = KERN_OVERFLOWGID,
540 .procname = "overflowgid",
541 .data = &overflowgid,
542 .maxlen = sizeof(int),
543 .mode = 0644,
544 .proc_handler = &proc_dointvec_minmax,
545 .strategy = &sysctl_intvec,
546 .extra1 = &minolduid,
547 .extra2 = &maxolduid,
548 },
Martin Schwidefsky347a8dc2006-01-06 00:19:28 -0800549#ifdef CONFIG_S390
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550#ifdef CONFIG_MATHEMU
551 {
552 .ctl_name = KERN_IEEE_EMULATION_WARNINGS,
553 .procname = "ieee_emulation_warnings",
554 .data = &sysctl_ieee_emulation_warnings,
555 .maxlen = sizeof(int),
556 .mode = 0644,
557 .proc_handler = &proc_dointvec,
558 },
559#endif
560#ifdef CONFIG_NO_IDLE_HZ
561 {
562 .ctl_name = KERN_HZ_TIMER,
563 .procname = "hz_timer",
564 .data = &sysctl_hz_timer,
565 .maxlen = sizeof(int),
566 .mode = 0644,
567 .proc_handler = &proc_dointvec,
568 },
569#endif
570 {
571 .ctl_name = KERN_S390_USER_DEBUG_LOGGING,
572 .procname = "userprocess_debug",
573 .data = &sysctl_userprocess_debug,
574 .maxlen = sizeof(int),
575 .mode = 0644,
576 .proc_handler = &proc_dointvec,
577 },
578#endif
579 {
580 .ctl_name = KERN_PIDMAX,
581 .procname = "pid_max",
582 .data = &pid_max,
583 .maxlen = sizeof (int),
584 .mode = 0644,
585 .proc_handler = &proc_dointvec_minmax,
586 .strategy = sysctl_intvec,
587 .extra1 = &pid_max_min,
588 .extra2 = &pid_max_max,
589 },
590 {
591 .ctl_name = KERN_PANIC_ON_OOPS,
592 .procname = "panic_on_oops",
593 .data = &panic_on_oops,
594 .maxlen = sizeof(int),
595 .mode = 0644,
596 .proc_handler = &proc_dointvec,
597 },
598 {
599 .ctl_name = KERN_PRINTK_RATELIMIT,
600 .procname = "printk_ratelimit",
601 .data = &printk_ratelimit_jiffies,
602 .maxlen = sizeof(int),
603 .mode = 0644,
604 .proc_handler = &proc_dointvec_jiffies,
605 .strategy = &sysctl_jiffies,
606 },
607 {
608 .ctl_name = KERN_PRINTK_RATELIMIT_BURST,
609 .procname = "printk_ratelimit_burst",
610 .data = &printk_ratelimit_burst,
611 .maxlen = sizeof(int),
612 .mode = 0644,
613 .proc_handler = &proc_dointvec,
614 },
615 {
616 .ctl_name = KERN_NGROUPS_MAX,
617 .procname = "ngroups_max",
618 .data = &ngroups_max,
619 .maxlen = sizeof (int),
620 .mode = 0444,
621 .proc_handler = &proc_dointvec,
622 },
623#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
624 {
625 .ctl_name = KERN_UNKNOWN_NMI_PANIC,
626 .procname = "unknown_nmi_panic",
627 .data = &unknown_nmi_panic,
628 .maxlen = sizeof (int),
629 .mode = 0644,
630 .proc_handler = &proc_unknown_nmi_panic,
631 },
632#endif
633#if defined(CONFIG_X86)
634 {
635 .ctl_name = KERN_BOOTLOADER_TYPE,
636 .procname = "bootloader_type",
637 .data = &bootloader_type,
638 .maxlen = sizeof (int),
639 .mode = 0444,
640 .proc_handler = &proc_dointvec,
641 },
642#endif
Luke Yang7a9166e2006-02-20 18:28:07 -0800643#if defined(CONFIG_MMU)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644 {
645 .ctl_name = KERN_RANDOMIZE,
646 .procname = "randomize_va_space",
647 .data = &randomize_va_space,
648 .maxlen = sizeof(int),
649 .mode = 0644,
650 .proc_handler = &proc_dointvec,
651 },
Luke Yang7a9166e2006-02-20 18:28:07 -0800652#endif
Martin Schwidefsky0152fb32006-01-14 13:21:00 -0800653#if defined(CONFIG_S390) && defined(CONFIG_SMP)
Martin Schwidefsky951f22d2005-07-27 11:44:57 -0700654 {
655 .ctl_name = KERN_SPIN_RETRY,
656 .procname = "spin_retry",
657 .data = &spin_retry,
658 .maxlen = sizeof (int),
659 .mode = 0644,
660 .proc_handler = &proc_dointvec,
661 },
662#endif
Pavel Machekc255d842006-02-20 18:27:58 -0800663#ifdef CONFIG_ACPI_SLEEP
664 {
665 .ctl_name = KERN_ACPI_VIDEO_FLAGS,
666 .procname = "acpi_video_flags",
667 .data = &acpi_video_flags,
668 .maxlen = sizeof (unsigned long),
669 .mode = 0644,
670 .proc_handler = &proc_dointvec,
671 },
672#endif
Jes Sorensend2b176e2006-02-28 09:42:23 -0800673#ifdef CONFIG_IA64
674 {
675 .ctl_name = KERN_IA64_UNALIGNED,
676 .procname = "ignore-unaligned-usertrap",
677 .data = &no_unaligned_warning,
678 .maxlen = sizeof (int),
679 .mode = 0644,
680 .proc_handler = &proc_dointvec,
681 },
682#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683 { .ctl_name = 0 }
684};
685
686/* Constants for minimum and maximum testing in vm_table.
687 We use these as one-element integer vectors. */
688static int zero;
689static int one_hundred = 100;
690
691
692static ctl_table vm_table[] = {
693 {
694 .ctl_name = VM_OVERCOMMIT_MEMORY,
695 .procname = "overcommit_memory",
696 .data = &sysctl_overcommit_memory,
697 .maxlen = sizeof(sysctl_overcommit_memory),
698 .mode = 0644,
699 .proc_handler = &proc_dointvec,
700 },
701 {
702 .ctl_name = VM_OVERCOMMIT_RATIO,
703 .procname = "overcommit_ratio",
704 .data = &sysctl_overcommit_ratio,
705 .maxlen = sizeof(sysctl_overcommit_ratio),
706 .mode = 0644,
707 .proc_handler = &proc_dointvec,
708 },
709 {
710 .ctl_name = VM_PAGE_CLUSTER,
711 .procname = "page-cluster",
712 .data = &page_cluster,
713 .maxlen = sizeof(int),
714 .mode = 0644,
715 .proc_handler = &proc_dointvec,
716 },
717 {
718 .ctl_name = VM_DIRTY_BACKGROUND,
719 .procname = "dirty_background_ratio",
720 .data = &dirty_background_ratio,
721 .maxlen = sizeof(dirty_background_ratio),
722 .mode = 0644,
723 .proc_handler = &proc_dointvec_minmax,
724 .strategy = &sysctl_intvec,
725 .extra1 = &zero,
726 .extra2 = &one_hundred,
727 },
728 {
729 .ctl_name = VM_DIRTY_RATIO,
730 .procname = "dirty_ratio",
731 .data = &vm_dirty_ratio,
732 .maxlen = sizeof(vm_dirty_ratio),
733 .mode = 0644,
734 .proc_handler = &proc_dointvec_minmax,
735 .strategy = &sysctl_intvec,
736 .extra1 = &zero,
737 .extra2 = &one_hundred,
738 },
739 {
740 .ctl_name = VM_DIRTY_WB_CS,
741 .procname = "dirty_writeback_centisecs",
742 .data = &dirty_writeback_centisecs,
743 .maxlen = sizeof(dirty_writeback_centisecs),
744 .mode = 0644,
745 .proc_handler = &dirty_writeback_centisecs_handler,
746 },
747 {
748 .ctl_name = VM_DIRTY_EXPIRE_CS,
749 .procname = "dirty_expire_centisecs",
750 .data = &dirty_expire_centisecs,
751 .maxlen = sizeof(dirty_expire_centisecs),
752 .mode = 0644,
753 .proc_handler = &proc_dointvec,
754 },
755 {
756 .ctl_name = VM_NR_PDFLUSH_THREADS,
757 .procname = "nr_pdflush_threads",
758 .data = &nr_pdflush_threads,
759 .maxlen = sizeof nr_pdflush_threads,
760 .mode = 0444 /* read-only*/,
761 .proc_handler = &proc_dointvec,
762 },
763 {
764 .ctl_name = VM_SWAPPINESS,
765 .procname = "swappiness",
766 .data = &vm_swappiness,
767 .maxlen = sizeof(vm_swappiness),
768 .mode = 0644,
769 .proc_handler = &proc_dointvec_minmax,
770 .strategy = &sysctl_intvec,
771 .extra1 = &zero,
772 .extra2 = &one_hundred,
773 },
774#ifdef CONFIG_HUGETLB_PAGE
775 {
776 .ctl_name = VM_HUGETLB_PAGES,
777 .procname = "nr_hugepages",
778 .data = &max_huge_pages,
779 .maxlen = sizeof(unsigned long),
780 .mode = 0644,
781 .proc_handler = &hugetlb_sysctl_handler,
782 .extra1 = (void *)&hugetlb_zero,
783 .extra2 = (void *)&hugetlb_infinity,
784 },
785 {
786 .ctl_name = VM_HUGETLB_GROUP,
787 .procname = "hugetlb_shm_group",
788 .data = &sysctl_hugetlb_shm_group,
789 .maxlen = sizeof(gid_t),
790 .mode = 0644,
791 .proc_handler = &proc_dointvec,
792 },
793#endif
794 {
795 .ctl_name = VM_LOWMEM_RESERVE_RATIO,
796 .procname = "lowmem_reserve_ratio",
797 .data = &sysctl_lowmem_reserve_ratio,
798 .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
799 .mode = 0644,
800 .proc_handler = &lowmem_reserve_ratio_sysctl_handler,
801 .strategy = &sysctl_intvec,
802 },
803 {
Andrew Morton9d0243b2006-01-08 01:00:39 -0800804 .ctl_name = VM_DROP_PAGECACHE,
805 .procname = "drop_caches",
806 .data = &sysctl_drop_caches,
807 .maxlen = sizeof(int),
808 .mode = 0644,
809 .proc_handler = drop_caches_sysctl_handler,
810 .strategy = &sysctl_intvec,
811 },
812 {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 .ctl_name = VM_MIN_FREE_KBYTES,
814 .procname = "min_free_kbytes",
815 .data = &min_free_kbytes,
816 .maxlen = sizeof(min_free_kbytes),
817 .mode = 0644,
818 .proc_handler = &min_free_kbytes_sysctl_handler,
819 .strategy = &sysctl_intvec,
820 .extra1 = &zero,
821 },
Rohit Seth8ad4b1f2006-01-08 01:00:40 -0800822 {
823 .ctl_name = VM_PERCPU_PAGELIST_FRACTION,
824 .procname = "percpu_pagelist_fraction",
825 .data = &percpu_pagelist_fraction,
826 .maxlen = sizeof(percpu_pagelist_fraction),
827 .mode = 0644,
828 .proc_handler = &percpu_pagelist_fraction_sysctl_handler,
829 .strategy = &sysctl_intvec,
830 .extra1 = &min_percpu_pagelist_fract,
831 },
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832#ifdef CONFIG_MMU
833 {
834 .ctl_name = VM_MAX_MAP_COUNT,
835 .procname = "max_map_count",
836 .data = &sysctl_max_map_count,
837 .maxlen = sizeof(sysctl_max_map_count),
838 .mode = 0644,
839 .proc_handler = &proc_dointvec
840 },
841#endif
842 {
843 .ctl_name = VM_LAPTOP_MODE,
844 .procname = "laptop_mode",
845 .data = &laptop_mode,
846 .maxlen = sizeof(laptop_mode),
847 .mode = 0644,
848 .proc_handler = &proc_dointvec,
849 .strategy = &sysctl_intvec,
850 .extra1 = &zero,
851 },
852 {
853 .ctl_name = VM_BLOCK_DUMP,
854 .procname = "block_dump",
855 .data = &block_dump,
856 .maxlen = sizeof(block_dump),
857 .mode = 0644,
858 .proc_handler = &proc_dointvec,
859 .strategy = &sysctl_intvec,
860 .extra1 = &zero,
861 },
862 {
863 .ctl_name = VM_VFS_CACHE_PRESSURE,
864 .procname = "vfs_cache_pressure",
865 .data = &sysctl_vfs_cache_pressure,
866 .maxlen = sizeof(sysctl_vfs_cache_pressure),
867 .mode = 0644,
868 .proc_handler = &proc_dointvec,
869 .strategy = &sysctl_intvec,
870 .extra1 = &zero,
871 },
872#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
873 {
874 .ctl_name = VM_LEGACY_VA_LAYOUT,
875 .procname = "legacy_va_layout",
876 .data = &sysctl_legacy_va_layout,
877 .maxlen = sizeof(sysctl_legacy_va_layout),
878 .mode = 0644,
879 .proc_handler = &proc_dointvec,
880 .strategy = &sysctl_intvec,
881 .extra1 = &zero,
882 },
883#endif
884#ifdef CONFIG_SWAP
885 {
886 .ctl_name = VM_SWAP_TOKEN_TIMEOUT,
887 .procname = "swap_token_timeout",
888 .data = &swap_token_default_timeout,
889 .maxlen = sizeof(swap_token_default_timeout),
890 .mode = 0644,
891 .proc_handler = &proc_dointvec_jiffies,
892 .strategy = &sysctl_jiffies,
893 },
894#endif
Christoph Lameter17436602006-01-18 17:42:32 -0800895#ifdef CONFIG_NUMA
896 {
897 .ctl_name = VM_ZONE_RECLAIM_MODE,
898 .procname = "zone_reclaim_mode",
899 .data = &zone_reclaim_mode,
900 .maxlen = sizeof(zone_reclaim_mode),
901 .mode = 0644,
902 .proc_handler = &proc_dointvec,
Christoph Lameterc84db232006-02-01 03:05:29 -0800903 .strategy = &sysctl_intvec,
904 .extra1 = &zero,
Christoph Lameter17436602006-01-18 17:42:32 -0800905 },
Christoph Lameter2a11ff02006-02-01 03:05:33 -0800906 {
907 .ctl_name = VM_ZONE_RECLAIM_INTERVAL,
908 .procname = "zone_reclaim_interval",
909 .data = &zone_reclaim_interval,
910 .maxlen = sizeof(zone_reclaim_interval),
911 .mode = 0644,
912 .proc_handler = &proc_dointvec_jiffies,
913 .strategy = &sysctl_jiffies,
914 },
Christoph Lameter17436602006-01-18 17:42:32 -0800915#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916 { .ctl_name = 0 }
917};
918
919static ctl_table proc_table[] = {
920 { .ctl_name = 0 }
921};
922
923static ctl_table fs_table[] = {
924 {
925 .ctl_name = FS_NRINODE,
926 .procname = "inode-nr",
927 .data = &inodes_stat,
928 .maxlen = 2*sizeof(int),
929 .mode = 0444,
930 .proc_handler = &proc_dointvec,
931 },
932 {
933 .ctl_name = FS_STATINODE,
934 .procname = "inode-state",
935 .data = &inodes_stat,
936 .maxlen = 7*sizeof(int),
937 .mode = 0444,
938 .proc_handler = &proc_dointvec,
939 },
940 {
941 .ctl_name = FS_NRFILE,
942 .procname = "file-nr",
943 .data = &files_stat,
944 .maxlen = 3*sizeof(int),
945 .mode = 0444,
946 .proc_handler = &proc_dointvec,
947 },
948 {
949 .ctl_name = FS_MAXFILE,
950 .procname = "file-max",
951 .data = &files_stat.max_files,
952 .maxlen = sizeof(int),
953 .mode = 0644,
954 .proc_handler = &proc_dointvec,
955 },
956 {
957 .ctl_name = FS_DENTRY,
958 .procname = "dentry-state",
959 .data = &dentry_stat,
960 .maxlen = 6*sizeof(int),
961 .mode = 0444,
962 .proc_handler = &proc_dointvec,
963 },
964 {
965 .ctl_name = FS_OVERFLOWUID,
966 .procname = "overflowuid",
967 .data = &fs_overflowuid,
968 .maxlen = sizeof(int),
969 .mode = 0644,
970 .proc_handler = &proc_dointvec_minmax,
971 .strategy = &sysctl_intvec,
972 .extra1 = &minolduid,
973 .extra2 = &maxolduid,
974 },
975 {
976 .ctl_name = FS_OVERFLOWGID,
977 .procname = "overflowgid",
978 .data = &fs_overflowgid,
979 .maxlen = sizeof(int),
980 .mode = 0644,
981 .proc_handler = &proc_dointvec_minmax,
982 .strategy = &sysctl_intvec,
983 .extra1 = &minolduid,
984 .extra2 = &maxolduid,
985 },
986 {
987 .ctl_name = FS_LEASES,
988 .procname = "leases-enable",
989 .data = &leases_enable,
990 .maxlen = sizeof(int),
991 .mode = 0644,
992 .proc_handler = &proc_dointvec,
993 },
994#ifdef CONFIG_DNOTIFY
995 {
996 .ctl_name = FS_DIR_NOTIFY,
997 .procname = "dir-notify-enable",
998 .data = &dir_notify_enable,
999 .maxlen = sizeof(int),
1000 .mode = 0644,
1001 .proc_handler = &proc_dointvec,
1002 },
1003#endif
1004#ifdef CONFIG_MMU
1005 {
1006 .ctl_name = FS_LEASE_TIME,
1007 .procname = "lease-break-time",
1008 .data = &lease_break_time,
1009 .maxlen = sizeof(int),
1010 .mode = 0644,
1011 .proc_handler = &proc_dointvec,
1012 },
1013 {
1014 .ctl_name = FS_AIO_NR,
1015 .procname = "aio-nr",
1016 .data = &aio_nr,
1017 .maxlen = sizeof(aio_nr),
1018 .mode = 0444,
Zach Brownd55b5fd2005-11-07 00:59:31 -08001019 .proc_handler = &proc_doulongvec_minmax,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020 },
1021 {
1022 .ctl_name = FS_AIO_MAX_NR,
1023 .procname = "aio-max-nr",
1024 .data = &aio_max_nr,
1025 .maxlen = sizeof(aio_max_nr),
1026 .mode = 0644,
Zach Brownd55b5fd2005-11-07 00:59:31 -08001027 .proc_handler = &proc_doulongvec_minmax,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028 },
Robert Love0399cb02005-07-13 12:38:18 -04001029#ifdef CONFIG_INOTIFY
1030 {
1031 .ctl_name = FS_INOTIFY,
1032 .procname = "inotify",
1033 .mode = 0555,
1034 .child = inotify_table,
1035 },
1036#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037#endif
Alan Coxd6e71142005-06-23 00:09:43 -07001038 {
1039 .ctl_name = KERN_SETUID_DUMPABLE,
1040 .procname = "suid_dumpable",
1041 .data = &suid_dumpable,
1042 .maxlen = sizeof(int),
1043 .mode = 0644,
1044 .proc_handler = &proc_dointvec,
1045 },
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046 { .ctl_name = 0 }
1047};
1048
1049static ctl_table debug_table[] = {
1050 { .ctl_name = 0 }
1051};
1052
1053static ctl_table dev_table[] = {
1054 { .ctl_name = 0 }
Robert Love0eeca282005-07-12 17:06:03 -04001055};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056
1057extern void init_irq_proc (void);
1058
Al Viro330d57f2005-11-04 10:18:40 +00001059static DEFINE_SPINLOCK(sysctl_lock);
1060
1061/* called under sysctl_lock */
1062static int use_table(struct ctl_table_header *p)
1063{
1064 if (unlikely(p->unregistering))
1065 return 0;
1066 p->used++;
1067 return 1;
1068}
1069
1070/* called under sysctl_lock */
1071static void unuse_table(struct ctl_table_header *p)
1072{
1073 if (!--p->used)
1074 if (unlikely(p->unregistering))
1075 complete(p->unregistering);
1076}
1077
1078/* called under sysctl_lock, will reacquire if has to wait */
1079static void start_unregistering(struct ctl_table_header *p)
1080{
1081 /*
1082 * if p->used is 0, nobody will ever touch that entry again;
1083 * we'll eliminate all paths to it before dropping sysctl_lock
1084 */
1085 if (unlikely(p->used)) {
1086 struct completion wait;
1087 init_completion(&wait);
1088 p->unregistering = &wait;
1089 spin_unlock(&sysctl_lock);
1090 wait_for_completion(&wait);
1091 spin_lock(&sysctl_lock);
1092 }
1093 /*
1094 * do not remove from the list until nobody holds it; walking the
1095 * list in do_sysctl() relies on that.
1096 */
1097 list_del_init(&p->ctl_entry);
1098}
1099
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100void __init sysctl_init(void)
1101{
1102#ifdef CONFIG_PROC_FS
Al Viro330d57f2005-11-04 10:18:40 +00001103 register_proc_table(root_table, proc_sys_root, &root_table_header);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 init_irq_proc();
1105#endif
1106}
1107
1108int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1109 void __user *newval, size_t newlen)
1110{
1111 struct list_head *tmp;
Al Viro330d57f2005-11-04 10:18:40 +00001112 int error = -ENOTDIR;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113
1114 if (nlen <= 0 || nlen >= CTL_MAXNAME)
1115 return -ENOTDIR;
1116 if (oldval) {
1117 int old_len;
1118 if (!oldlenp || get_user(old_len, oldlenp))
1119 return -EFAULT;
1120 }
Al Viro330d57f2005-11-04 10:18:40 +00001121 spin_lock(&sysctl_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122 tmp = &root_table_header.ctl_entry;
1123 do {
1124 struct ctl_table_header *head =
1125 list_entry(tmp, struct ctl_table_header, ctl_entry);
1126 void *context = NULL;
Al Viro330d57f2005-11-04 10:18:40 +00001127
1128 if (!use_table(head))
1129 continue;
1130
1131 spin_unlock(&sysctl_lock);
1132
1133 error = parse_table(name, nlen, oldval, oldlenp,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134 newval, newlen, head->ctl_table,
1135 &context);
Jesper Juhl5a6b4542005-06-25 14:58:48 -07001136 kfree(context);
Al Viro330d57f2005-11-04 10:18:40 +00001137
1138 spin_lock(&sysctl_lock);
1139 unuse_table(head);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 if (error != -ENOTDIR)
Al Viro330d57f2005-11-04 10:18:40 +00001141 break;
1142 } while ((tmp = tmp->next) != &root_table_header.ctl_entry);
1143 spin_unlock(&sysctl_lock);
1144 return error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145}
1146
1147asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1148{
1149 struct __sysctl_args tmp;
1150 int error;
1151
1152 if (copy_from_user(&tmp, args, sizeof(tmp)))
1153 return -EFAULT;
1154
1155 lock_kernel();
1156 error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1157 tmp.newval, tmp.newlen);
1158 unlock_kernel();
1159 return error;
1160}
1161
1162/*
1163 * ctl_perm does NOT grant the superuser all rights automatically, because
1164 * some sysctl variables are readonly even to root.
1165 */
1166
1167static int test_perm(int mode, int op)
1168{
1169 if (!current->euid)
1170 mode >>= 6;
1171 else if (in_egroup_p(0))
1172 mode >>= 3;
1173 if ((mode & op & 0007) == op)
1174 return 0;
1175 return -EACCES;
1176}
1177
1178static inline int ctl_perm(ctl_table *table, int op)
1179{
1180 int error;
1181 error = security_sysctl(table, op);
1182 if (error)
1183 return error;
1184 return test_perm(table->mode, op);
1185}
1186
1187static int parse_table(int __user *name, int nlen,
1188 void __user *oldval, size_t __user *oldlenp,
1189 void __user *newval, size_t newlen,
1190 ctl_table *table, void **context)
1191{
1192 int n;
1193repeat:
1194 if (!nlen)
1195 return -ENOTDIR;
1196 if (get_user(n, name))
1197 return -EFAULT;
1198 for ( ; table->ctl_name; table++) {
1199 if (n == table->ctl_name || table->ctl_name == CTL_ANY) {
1200 int error;
1201 if (table->child) {
1202 if (ctl_perm(table, 001))
1203 return -EPERM;
1204 if (table->strategy) {
1205 error = table->strategy(
1206 table, name, nlen,
1207 oldval, oldlenp,
1208 newval, newlen, context);
1209 if (error)
1210 return error;
1211 }
1212 name++;
1213 nlen--;
1214 table = table->child;
1215 goto repeat;
1216 }
1217 error = do_sysctl_strategy(table, name, nlen,
1218 oldval, oldlenp,
1219 newval, newlen, context);
1220 return error;
1221 }
1222 }
1223 return -ENOTDIR;
1224}
1225
1226/* Perform the actual read/write of a sysctl table entry. */
1227int do_sysctl_strategy (ctl_table *table,
1228 int __user *name, int nlen,
1229 void __user *oldval, size_t __user *oldlenp,
1230 void __user *newval, size_t newlen, void **context)
1231{
1232 int op = 0, rc;
1233 size_t len;
1234
1235 if (oldval)
1236 op |= 004;
1237 if (newval)
1238 op |= 002;
1239 if (ctl_perm(table, op))
1240 return -EPERM;
1241
1242 if (table->strategy) {
1243 rc = table->strategy(table, name, nlen, oldval, oldlenp,
1244 newval, newlen, context);
1245 if (rc < 0)
1246 return rc;
1247 if (rc > 0)
1248 return 0;
1249 }
1250
1251 /* If there is no strategy routine, or if the strategy returns
1252 * zero, proceed with automatic r/w */
1253 if (table->data && table->maxlen) {
1254 if (oldval && oldlenp) {
1255 if (get_user(len, oldlenp))
1256 return -EFAULT;
1257 if (len) {
1258 if (len > table->maxlen)
1259 len = table->maxlen;
1260 if(copy_to_user(oldval, table->data, len))
1261 return -EFAULT;
1262 if(put_user(len, oldlenp))
1263 return -EFAULT;
1264 }
1265 }
1266 if (newval && newlen) {
1267 len = newlen;
1268 if (len > table->maxlen)
1269 len = table->maxlen;
1270 if(copy_from_user(table->data, newval, len))
1271 return -EFAULT;
1272 }
1273 }
1274 return 0;
1275}
1276
1277/**
1278 * register_sysctl_table - register a sysctl hierarchy
1279 * @table: the top-level table structure
1280 * @insert_at_head: whether the entry should be inserted in front or at the end
1281 *
1282 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1283 * array. An entry with a ctl_name of 0 terminates the table.
1284 *
1285 * The members of the &ctl_table structure are used as follows:
1286 *
1287 * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1288 * must be unique within that level of sysctl
1289 *
1290 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1291 * enter a sysctl file
1292 *
1293 * data - a pointer to data for use by proc_handler
1294 *
1295 * maxlen - the maximum size in bytes of the data
1296 *
1297 * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1298 *
1299 * child - a pointer to the child sysctl table if this entry is a directory, or
1300 * %NULL.
1301 *
1302 * proc_handler - the text handler routine (described below)
1303 *
1304 * strategy - the strategy routine (described below)
1305 *
1306 * de - for internal use by the sysctl routines
1307 *
1308 * extra1, extra2 - extra pointers usable by the proc handler routines
1309 *
1310 * Leaf nodes in the sysctl tree will be represented by a single file
1311 * under /proc; non-leaf nodes will be represented by directories.
1312 *
1313 * sysctl(2) can automatically manage read and write requests through
1314 * the sysctl table. The data and maxlen fields of the ctl_table
1315 * struct enable minimal validation of the values being written to be
1316 * performed, and the mode field allows minimal authentication.
1317 *
1318 * More sophisticated management can be enabled by the provision of a
1319 * strategy routine with the table entry. This will be called before
1320 * any automatic read or write of the data is performed.
1321 *
1322 * The strategy routine may return
1323 *
1324 * < 0 - Error occurred (error is passed to user process)
1325 *
1326 * 0 - OK - proceed with automatic read or write.
1327 *
1328 * > 0 - OK - read or write has been done by the strategy routine, so
1329 * return immediately.
1330 *
1331 * There must be a proc_handler routine for any terminal nodes
1332 * mirrored under /proc/sys (non-terminals are handled by a built-in
1333 * directory handler). Several default handlers are available to
1334 * cover common cases -
1335 *
1336 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1337 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1338 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1339 *
1340 * It is the handler's job to read the input buffer from user memory
1341 * and process it. The handler should return 0 on success.
1342 *
1343 * This routine returns %NULL on a failure to register, and a pointer
1344 * to the table header on success.
1345 */
1346struct ctl_table_header *register_sysctl_table(ctl_table * table,
1347 int insert_at_head)
1348{
1349 struct ctl_table_header *tmp;
1350 tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1351 if (!tmp)
1352 return NULL;
1353 tmp->ctl_table = table;
1354 INIT_LIST_HEAD(&tmp->ctl_entry);
Al Viro330d57f2005-11-04 10:18:40 +00001355 tmp->used = 0;
1356 tmp->unregistering = NULL;
1357 spin_lock(&sysctl_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358 if (insert_at_head)
1359 list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
1360 else
1361 list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
Al Viro330d57f2005-11-04 10:18:40 +00001362 spin_unlock(&sysctl_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001363#ifdef CONFIG_PROC_FS
Al Viro330d57f2005-11-04 10:18:40 +00001364 register_proc_table(table, proc_sys_root, tmp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365#endif
1366 return tmp;
1367}
1368
1369/**
1370 * unregister_sysctl_table - unregister a sysctl table hierarchy
1371 * @header: the header returned from register_sysctl_table
1372 *
1373 * Unregisters the sysctl table and all children. proc entries may not
1374 * actually be removed until they are no longer used by anyone.
1375 */
1376void unregister_sysctl_table(struct ctl_table_header * header)
1377{
Al Viro330d57f2005-11-04 10:18:40 +00001378 might_sleep();
1379 spin_lock(&sysctl_lock);
1380 start_unregistering(header);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381#ifdef CONFIG_PROC_FS
1382 unregister_proc_table(header->ctl_table, proc_sys_root);
1383#endif
Al Viro330d57f2005-11-04 10:18:40 +00001384 spin_unlock(&sysctl_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385 kfree(header);
1386}
1387
1388/*
1389 * /proc/sys support
1390 */
1391
1392#ifdef CONFIG_PROC_FS
1393
1394/* Scan the sysctl entries in table and add them all into /proc */
Al Viro330d57f2005-11-04 10:18:40 +00001395static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396{
1397 struct proc_dir_entry *de;
1398 int len;
1399 mode_t mode;
1400
1401 for (; table->ctl_name; table++) {
1402 /* Can't do anything without a proc name. */
1403 if (!table->procname)
1404 continue;
1405 /* Maybe we can't do anything with it... */
1406 if (!table->proc_handler && !table->child) {
1407 printk(KERN_WARNING "SYSCTL: Can't register %s\n",
1408 table->procname);
1409 continue;
1410 }
1411
1412 len = strlen(table->procname);
1413 mode = table->mode;
1414
1415 de = NULL;
1416 if (table->proc_handler)
1417 mode |= S_IFREG;
1418 else {
1419 mode |= S_IFDIR;
1420 for (de = root->subdir; de; de = de->next) {
1421 if (proc_match(len, table->procname, de))
1422 break;
1423 }
1424 /* If the subdir exists already, de is non-NULL */
1425 }
1426
1427 if (!de) {
1428 de = create_proc_entry(table->procname, mode, root);
1429 if (!de)
1430 continue;
Al Viro330d57f2005-11-04 10:18:40 +00001431 de->set = set;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432 de->data = (void *) table;
1433 if (table->proc_handler)
1434 de->proc_fops = &proc_sys_file_operations;
1435 }
1436 table->de = de;
1437 if (de->mode & S_IFDIR)
Al Viro330d57f2005-11-04 10:18:40 +00001438 register_proc_table(table->child, de, set);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439 }
1440}
1441
1442/*
1443 * Unregister a /proc sysctl table and any subdirectories.
1444 */
1445static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root)
1446{
1447 struct proc_dir_entry *de;
1448 for (; table->ctl_name; table++) {
1449 if (!(de = table->de))
1450 continue;
1451 if (de->mode & S_IFDIR) {
1452 if (!table->child) {
1453 printk (KERN_ALERT "Help - malformed sysctl tree on free\n");
1454 continue;
1455 }
1456 unregister_proc_table(table->child, de);
1457
1458 /* Don't unregister directories which still have entries.. */
1459 if (de->subdir)
1460 continue;
1461 }
1462
Al Viro330d57f2005-11-04 10:18:40 +00001463 /*
1464 * In any case, mark the entry as goner; we'll keep it
1465 * around if it's busy, but we'll know to do nothing with
1466 * its fields. We are under sysctl_lock here.
1467 */
1468 de->data = NULL;
1469
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470 /* Don't unregister proc entries that are still being used.. */
1471 if (atomic_read(&de->count))
1472 continue;
1473
1474 table->de = NULL;
1475 remove_proc_entry(table->procname, root);
1476 }
1477}
1478
1479static ssize_t do_rw_proc(int write, struct file * file, char __user * buf,
1480 size_t count, loff_t *ppos)
1481{
1482 int op;
Al Viro330d57f2005-11-04 10:18:40 +00001483 struct proc_dir_entry *de = PDE(file->f_dentry->d_inode);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484 struct ctl_table *table;
1485 size_t res;
Al Viro330d57f2005-11-04 10:18:40 +00001486 ssize_t error = -ENOTDIR;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487
Al Viro330d57f2005-11-04 10:18:40 +00001488 spin_lock(&sysctl_lock);
1489 if (de && de->data && use_table(de->set)) {
1490 /*
1491 * at that point we know that sysctl was not unregistered
1492 * and won't be until we finish
1493 */
1494 spin_unlock(&sysctl_lock);
1495 table = (struct ctl_table *) de->data;
1496 if (!table || !table->proc_handler)
1497 goto out;
1498 error = -EPERM;
1499 op = (write ? 002 : 004);
1500 if (ctl_perm(table, op))
1501 goto out;
1502
1503 /* careful: calling conventions are nasty here */
1504 res = count;
1505 error = (*table->proc_handler)(table, write, file,
1506 buf, &res, ppos);
1507 if (!error)
1508 error = res;
1509 out:
1510 spin_lock(&sysctl_lock);
1511 unuse_table(de->set);
1512 }
1513 spin_unlock(&sysctl_lock);
1514 return error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515}
1516
1517static int proc_opensys(struct inode *inode, struct file *file)
1518{
1519 if (file->f_mode & FMODE_WRITE) {
1520 /*
1521 * sysctl entries that are not writable,
1522 * are _NOT_ writable, capabilities or not.
1523 */
1524 if (!(inode->i_mode & S_IWUSR))
1525 return -EPERM;
1526 }
1527
1528 return 0;
1529}
1530
1531static ssize_t proc_readsys(struct file * file, char __user * buf,
1532 size_t count, loff_t *ppos)
1533{
1534 return do_rw_proc(0, file, buf, count, ppos);
1535}
1536
1537static ssize_t proc_writesys(struct file * file, const char __user * buf,
1538 size_t count, loff_t *ppos)
1539{
1540 return do_rw_proc(1, file, (char __user *) buf, count, ppos);
1541}
1542
1543/**
1544 * proc_dostring - read a string sysctl
1545 * @table: the sysctl table
1546 * @write: %TRUE if this is a write to the sysctl file
1547 * @filp: the file structure
1548 * @buffer: the user buffer
1549 * @lenp: the size of the user buffer
1550 * @ppos: file position
1551 *
1552 * Reads/writes a string from/to the user buffer. If the kernel
1553 * buffer provided is not large enough to hold the string, the
1554 * string is truncated. The copied string is %NULL-terminated.
1555 * If the string is being read by the user process, it is copied
1556 * and a newline '\n' is added. It is truncated if the buffer is
1557 * not large enough.
1558 *
1559 * Returns 0 on success.
1560 */
1561int proc_dostring(ctl_table *table, int write, struct file *filp,
1562 void __user *buffer, size_t *lenp, loff_t *ppos)
1563{
1564 size_t len;
1565 char __user *p;
1566 char c;
1567
1568 if (!table->data || !table->maxlen || !*lenp ||
1569 (*ppos && !write)) {
1570 *lenp = 0;
1571 return 0;
1572 }
1573
1574 if (write) {
1575 len = 0;
1576 p = buffer;
1577 while (len < *lenp) {
1578 if (get_user(c, p++))
1579 return -EFAULT;
1580 if (c == 0 || c == '\n')
1581 break;
1582 len++;
1583 }
1584 if (len >= table->maxlen)
1585 len = table->maxlen-1;
1586 if(copy_from_user(table->data, buffer, len))
1587 return -EFAULT;
1588 ((char *) table->data)[len] = 0;
1589 *ppos += *lenp;
1590 } else {
1591 len = strlen(table->data);
1592 if (len > table->maxlen)
1593 len = table->maxlen;
1594 if (len > *lenp)
1595 len = *lenp;
1596 if (len)
1597 if(copy_to_user(buffer, table->data, len))
1598 return -EFAULT;
1599 if (len < *lenp) {
1600 if(put_user('\n', ((char __user *) buffer) + len))
1601 return -EFAULT;
1602 len++;
1603 }
1604 *lenp = len;
1605 *ppos += len;
1606 }
1607 return 0;
1608}
1609
1610/*
1611 * Special case of dostring for the UTS structure. This has locks
1612 * to observe. Should this be in kernel/sys.c ????
1613 */
1614
1615static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
1616 void __user *buffer, size_t *lenp, loff_t *ppos)
1617{
1618 int r;
1619
1620 if (!write) {
1621 down_read(&uts_sem);
1622 r=proc_dostring(table,0,filp,buffer,lenp, ppos);
1623 up_read(&uts_sem);
1624 } else {
1625 down_write(&uts_sem);
1626 r=proc_dostring(table,1,filp,buffer,lenp, ppos);
1627 up_write(&uts_sem);
1628 }
1629 return r;
1630}
1631
1632static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1633 int *valp,
1634 int write, void *data)
1635{
1636 if (write) {
1637 *valp = *negp ? -*lvalp : *lvalp;
1638 } else {
1639 int val = *valp;
1640 if (val < 0) {
1641 *negp = -1;
1642 *lvalp = (unsigned long)-val;
1643 } else {
1644 *negp = 0;
1645 *lvalp = (unsigned long)val;
1646 }
1647 }
1648 return 0;
1649}
1650
1651static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
1652 void __user *buffer, size_t *lenp, loff_t *ppos,
1653 int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1654 int write, void *data),
1655 void *data)
1656{
1657#define TMPBUFLEN 21
1658 int *i, vleft, first=1, neg, val;
1659 unsigned long lval;
1660 size_t left, len;
1661
1662 char buf[TMPBUFLEN], *p;
1663 char __user *s = buffer;
1664
1665 if (!table->data || !table->maxlen || !*lenp ||
1666 (*ppos && !write)) {
1667 *lenp = 0;
1668 return 0;
1669 }
1670
1671 i = (int *) table->data;
1672 vleft = table->maxlen / sizeof(*i);
1673 left = *lenp;
1674
1675 if (!conv)
1676 conv = do_proc_dointvec_conv;
1677
1678 for (; left && vleft--; i++, first=0) {
1679 if (write) {
1680 while (left) {
1681 char c;
1682 if (get_user(c, s))
1683 return -EFAULT;
1684 if (!isspace(c))
1685 break;
1686 left--;
1687 s++;
1688 }
1689 if (!left)
1690 break;
1691 neg = 0;
1692 len = left;
1693 if (len > sizeof(buf) - 1)
1694 len = sizeof(buf) - 1;
1695 if (copy_from_user(buf, s, len))
1696 return -EFAULT;
1697 buf[len] = 0;
1698 p = buf;
1699 if (*p == '-' && left > 1) {
1700 neg = 1;
1701 left--, p++;
1702 }
1703 if (*p < '0' || *p > '9')
1704 break;
1705
1706 lval = simple_strtoul(p, &p, 0);
1707
1708 len = p-buf;
1709 if ((len < left) && *p && !isspace(*p))
1710 break;
1711 if (neg)
1712 val = -val;
1713 s += len;
1714 left -= len;
1715
1716 if (conv(&neg, &lval, i, 1, data))
1717 break;
1718 } else {
1719 p = buf;
1720 if (!first)
1721 *p++ = '\t';
1722
1723 if (conv(&neg, &lval, i, 0, data))
1724 break;
1725
1726 sprintf(p, "%s%lu", neg ? "-" : "", lval);
1727 len = strlen(buf);
1728 if (len > left)
1729 len = left;
1730 if(copy_to_user(s, buf, len))
1731 return -EFAULT;
1732 left -= len;
1733 s += len;
1734 }
1735 }
1736
1737 if (!write && !first && left) {
1738 if(put_user('\n', s))
1739 return -EFAULT;
1740 left--, s++;
1741 }
1742 if (write) {
1743 while (left) {
1744 char c;
1745 if (get_user(c, s++))
1746 return -EFAULT;
1747 if (!isspace(c))
1748 break;
1749 left--;
1750 }
1751 }
1752 if (write && first)
1753 return -EINVAL;
1754 *lenp -= left;
1755 *ppos += *lenp;
1756 return 0;
1757#undef TMPBUFLEN
1758}
1759
1760/**
1761 * proc_dointvec - read a vector of integers
1762 * @table: the sysctl table
1763 * @write: %TRUE if this is a write to the sysctl file
1764 * @filp: the file structure
1765 * @buffer: the user buffer
1766 * @lenp: the size of the user buffer
1767 * @ppos: file position
1768 *
1769 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1770 * values from/to the user buffer, treated as an ASCII string.
1771 *
1772 * Returns 0 on success.
1773 */
1774int proc_dointvec(ctl_table *table, int write, struct file *filp,
1775 void __user *buffer, size_t *lenp, loff_t *ppos)
1776{
1777 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1778 NULL,NULL);
1779}
1780
1781#define OP_SET 0
1782#define OP_AND 1
1783#define OP_OR 2
1784#define OP_MAX 3
1785#define OP_MIN 4
1786
1787static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1788 int *valp,
1789 int write, void *data)
1790{
1791 int op = *(int *)data;
1792 if (write) {
1793 int val = *negp ? -*lvalp : *lvalp;
1794 switch(op) {
1795 case OP_SET: *valp = val; break;
1796 case OP_AND: *valp &= val; break;
1797 case OP_OR: *valp |= val; break;
1798 case OP_MAX: if(*valp < val)
1799 *valp = val;
1800 break;
1801 case OP_MIN: if(*valp > val)
1802 *valp = val;
1803 break;
1804 }
1805 } else {
1806 int val = *valp;
1807 if (val < 0) {
1808 *negp = -1;
1809 *lvalp = (unsigned long)-val;
1810 } else {
1811 *negp = 0;
1812 *lvalp = (unsigned long)val;
1813 }
1814 }
1815 return 0;
1816}
1817
1818/*
1819 * init may raise the set.
1820 */
1821
1822int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
1823 void __user *buffer, size_t *lenp, loff_t *ppos)
1824{
1825 int op;
1826
1827 if (!capable(CAP_SYS_MODULE)) {
1828 return -EPERM;
1829 }
1830
1831 op = (current->pid == 1) ? OP_SET : OP_AND;
1832 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1833 do_proc_dointvec_bset_conv,&op);
1834}
1835
1836struct do_proc_dointvec_minmax_conv_param {
1837 int *min;
1838 int *max;
1839};
1840
1841static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
1842 int *valp,
1843 int write, void *data)
1844{
1845 struct do_proc_dointvec_minmax_conv_param *param = data;
1846 if (write) {
1847 int val = *negp ? -*lvalp : *lvalp;
1848 if ((param->min && *param->min > val) ||
1849 (param->max && *param->max < val))
1850 return -EINVAL;
1851 *valp = val;
1852 } else {
1853 int val = *valp;
1854 if (val < 0) {
1855 *negp = -1;
1856 *lvalp = (unsigned long)-val;
1857 } else {
1858 *negp = 0;
1859 *lvalp = (unsigned long)val;
1860 }
1861 }
1862 return 0;
1863}
1864
1865/**
1866 * proc_dointvec_minmax - read a vector of integers with min/max values
1867 * @table: the sysctl table
1868 * @write: %TRUE if this is a write to the sysctl file
1869 * @filp: the file structure
1870 * @buffer: the user buffer
1871 * @lenp: the size of the user buffer
1872 * @ppos: file position
1873 *
1874 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1875 * values from/to the user buffer, treated as an ASCII string.
1876 *
1877 * This routine will ensure the values are within the range specified by
1878 * table->extra1 (min) and table->extra2 (max).
1879 *
1880 * Returns 0 on success.
1881 */
1882int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
1883 void __user *buffer, size_t *lenp, loff_t *ppos)
1884{
1885 struct do_proc_dointvec_minmax_conv_param param = {
1886 .min = (int *) table->extra1,
1887 .max = (int *) table->extra2,
1888 };
1889 return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
1890 do_proc_dointvec_minmax_conv, &param);
1891}
1892
1893static int do_proc_doulongvec_minmax(ctl_table *table, int write,
1894 struct file *filp,
1895 void __user *buffer,
1896 size_t *lenp, loff_t *ppos,
1897 unsigned long convmul,
1898 unsigned long convdiv)
1899{
1900#define TMPBUFLEN 21
1901 unsigned long *i, *min, *max, val;
1902 int vleft, first=1, neg;
1903 size_t len, left;
1904 char buf[TMPBUFLEN], *p;
1905 char __user *s = buffer;
1906
1907 if (!table->data || !table->maxlen || !*lenp ||
1908 (*ppos && !write)) {
1909 *lenp = 0;
1910 return 0;
1911 }
1912
1913 i = (unsigned long *) table->data;
1914 min = (unsigned long *) table->extra1;
1915 max = (unsigned long *) table->extra2;
1916 vleft = table->maxlen / sizeof(unsigned long);
1917 left = *lenp;
1918
1919 for (; left && vleft--; i++, min++, max++, first=0) {
1920 if (write) {
1921 while (left) {
1922 char c;
1923 if (get_user(c, s))
1924 return -EFAULT;
1925 if (!isspace(c))
1926 break;
1927 left--;
1928 s++;
1929 }
1930 if (!left)
1931 break;
1932 neg = 0;
1933 len = left;
1934 if (len > TMPBUFLEN-1)
1935 len = TMPBUFLEN-1;
1936 if (copy_from_user(buf, s, len))
1937 return -EFAULT;
1938 buf[len] = 0;
1939 p = buf;
1940 if (*p == '-' && left > 1) {
1941 neg = 1;
1942 left--, p++;
1943 }
1944 if (*p < '0' || *p > '9')
1945 break;
1946 val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
1947 len = p-buf;
1948 if ((len < left) && *p && !isspace(*p))
1949 break;
1950 if (neg)
1951 val = -val;
1952 s += len;
1953 left -= len;
1954
1955 if(neg)
1956 continue;
1957 if ((min && val < *min) || (max && val > *max))
1958 continue;
1959 *i = val;
1960 } else {
1961 p = buf;
1962 if (!first)
1963 *p++ = '\t';
1964 sprintf(p, "%lu", convdiv * (*i) / convmul);
1965 len = strlen(buf);
1966 if (len > left)
1967 len = left;
1968 if(copy_to_user(s, buf, len))
1969 return -EFAULT;
1970 left -= len;
1971 s += len;
1972 }
1973 }
1974
1975 if (!write && !first && left) {
1976 if(put_user('\n', s))
1977 return -EFAULT;
1978 left--, s++;
1979 }
1980 if (write) {
1981 while (left) {
1982 char c;
1983 if (get_user(c, s++))
1984 return -EFAULT;
1985 if (!isspace(c))
1986 break;
1987 left--;
1988 }
1989 }
1990 if (write && first)
1991 return -EINVAL;
1992 *lenp -= left;
1993 *ppos += *lenp;
1994 return 0;
1995#undef TMPBUFLEN
1996}
1997
1998/**
1999 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2000 * @table: the sysctl table
2001 * @write: %TRUE if this is a write to the sysctl file
2002 * @filp: the file structure
2003 * @buffer: the user buffer
2004 * @lenp: the size of the user buffer
2005 * @ppos: file position
2006 *
2007 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2008 * values from/to the user buffer, treated as an ASCII string.
2009 *
2010 * This routine will ensure the values are within the range specified by
2011 * table->extra1 (min) and table->extra2 (max).
2012 *
2013 * Returns 0 on success.
2014 */
2015int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2016 void __user *buffer, size_t *lenp, loff_t *ppos)
2017{
2018 return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2019}
2020
2021/**
2022 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2023 * @table: the sysctl table
2024 * @write: %TRUE if this is a write to the sysctl file
2025 * @filp: the file structure
2026 * @buffer: the user buffer
2027 * @lenp: the size of the user buffer
2028 * @ppos: file position
2029 *
2030 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2031 * values from/to the user buffer, treated as an ASCII string. The values
2032 * are treated as milliseconds, and converted to jiffies when they are stored.
2033 *
2034 * This routine will ensure the values are within the range specified by
2035 * table->extra1 (min) and table->extra2 (max).
2036 *
2037 * Returns 0 on success.
2038 */
2039int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2040 struct file *filp,
2041 void __user *buffer,
2042 size_t *lenp, loff_t *ppos)
2043{
2044 return do_proc_doulongvec_minmax(table, write, filp, buffer,
2045 lenp, ppos, HZ, 1000l);
2046}
2047
2048
2049static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2050 int *valp,
2051 int write, void *data)
2052{
2053 if (write) {
2054 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2055 } else {
2056 int val = *valp;
2057 unsigned long lval;
2058 if (val < 0) {
2059 *negp = -1;
2060 lval = (unsigned long)-val;
2061 } else {
2062 *negp = 0;
2063 lval = (unsigned long)val;
2064 }
2065 *lvalp = lval / HZ;
2066 }
2067 return 0;
2068}
2069
2070static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2071 int *valp,
2072 int write, void *data)
2073{
2074 if (write) {
2075 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2076 } else {
2077 int val = *valp;
2078 unsigned long lval;
2079 if (val < 0) {
2080 *negp = -1;
2081 lval = (unsigned long)-val;
2082 } else {
2083 *negp = 0;
2084 lval = (unsigned long)val;
2085 }
2086 *lvalp = jiffies_to_clock_t(lval);
2087 }
2088 return 0;
2089}
2090
2091static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2092 int *valp,
2093 int write, void *data)
2094{
2095 if (write) {
2096 *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2097 } else {
2098 int val = *valp;
2099 unsigned long lval;
2100 if (val < 0) {
2101 *negp = -1;
2102 lval = (unsigned long)-val;
2103 } else {
2104 *negp = 0;
2105 lval = (unsigned long)val;
2106 }
2107 *lvalp = jiffies_to_msecs(lval);
2108 }
2109 return 0;
2110}
2111
2112/**
2113 * proc_dointvec_jiffies - read a vector of integers as seconds
2114 * @table: the sysctl table
2115 * @write: %TRUE if this is a write to the sysctl file
2116 * @filp: the file structure
2117 * @buffer: the user buffer
2118 * @lenp: the size of the user buffer
2119 * @ppos: file position
2120 *
2121 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2122 * values from/to the user buffer, treated as an ASCII string.
2123 * The values read are assumed to be in seconds, and are converted into
2124 * jiffies.
2125 *
2126 * Returns 0 on success.
2127 */
2128int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2129 void __user *buffer, size_t *lenp, loff_t *ppos)
2130{
2131 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2132 do_proc_dointvec_jiffies_conv,NULL);
2133}
2134
2135/**
2136 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2137 * @table: the sysctl table
2138 * @write: %TRUE if this is a write to the sysctl file
2139 * @filp: the file structure
2140 * @buffer: the user buffer
2141 * @lenp: the size of the user buffer
Randy Dunlap1e5d5332005-11-07 01:01:06 -08002142 * @ppos: pointer to the file position
Linus Torvalds1da177e2005-04-16 15:20:36 -07002143 *
2144 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2145 * values from/to the user buffer, treated as an ASCII string.
2146 * The values read are assumed to be in 1/USER_HZ seconds, and
2147 * are converted into jiffies.
2148 *
2149 * Returns 0 on success.
2150 */
2151int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2152 void __user *buffer, size_t *lenp, loff_t *ppos)
2153{
2154 return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2155 do_proc_dointvec_userhz_jiffies_conv,NULL);
2156}
2157
2158/**
2159 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2160 * @table: the sysctl table
2161 * @write: %TRUE if this is a write to the sysctl file
2162 * @filp: the file structure
2163 * @buffer: the user buffer
2164 * @lenp: the size of the user buffer
Martin Waitz67be2dd2005-05-01 08:59:26 -07002165 * @ppos: file position
2166 * @ppos: the current position in the file
Linus Torvalds1da177e2005-04-16 15:20:36 -07002167 *
2168 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2169 * values from/to the user buffer, treated as an ASCII string.
2170 * The values read are assumed to be in 1/1000 seconds, and
2171 * are converted into jiffies.
2172 *
2173 * Returns 0 on success.
2174 */
2175int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2176 void __user *buffer, size_t *lenp, loff_t *ppos)
2177{
2178 return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2179 do_proc_dointvec_ms_jiffies_conv, NULL);
2180}
2181
2182#else /* CONFIG_PROC_FS */
2183
2184int proc_dostring(ctl_table *table, int write, struct file *filp,
2185 void __user *buffer, size_t *lenp, loff_t *ppos)
2186{
2187 return -ENOSYS;
2188}
2189
2190static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
2191 void __user *buffer, size_t *lenp, loff_t *ppos)
2192{
2193 return -ENOSYS;
2194}
2195
2196int proc_dointvec(ctl_table *table, int write, struct file *filp,
2197 void __user *buffer, size_t *lenp, loff_t *ppos)
2198{
2199 return -ENOSYS;
2200}
2201
2202int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2203 void __user *buffer, size_t *lenp, loff_t *ppos)
2204{
2205 return -ENOSYS;
2206}
2207
2208int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2209 void __user *buffer, size_t *lenp, loff_t *ppos)
2210{
2211 return -ENOSYS;
2212}
2213
2214int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2215 void __user *buffer, size_t *lenp, loff_t *ppos)
2216{
2217 return -ENOSYS;
2218}
2219
2220int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2221 void __user *buffer, size_t *lenp, loff_t *ppos)
2222{
2223 return -ENOSYS;
2224}
2225
2226int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2227 void __user *buffer, size_t *lenp, loff_t *ppos)
2228{
2229 return -ENOSYS;
2230}
2231
2232int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2233 void __user *buffer, size_t *lenp, loff_t *ppos)
2234{
2235 return -ENOSYS;
2236}
2237
2238int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2239 struct file *filp,
2240 void __user *buffer,
2241 size_t *lenp, loff_t *ppos)
2242{
2243 return -ENOSYS;
2244}
2245
2246
2247#endif /* CONFIG_PROC_FS */
2248
2249
2250/*
2251 * General sysctl support routines
2252 */
2253
2254/* The generic string strategy routine: */
2255int sysctl_string(ctl_table *table, int __user *name, int nlen,
2256 void __user *oldval, size_t __user *oldlenp,
2257 void __user *newval, size_t newlen, void **context)
2258{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259 if (!table->data || !table->maxlen)
2260 return -ENOTDIR;
2261
2262 if (oldval && oldlenp) {
Linus Torvaldsde9e0072005-12-31 17:00:29 -08002263 size_t bufsize;
2264 if (get_user(bufsize, oldlenp))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002265 return -EFAULT;
Linus Torvaldsde9e0072005-12-31 17:00:29 -08002266 if (bufsize) {
2267 size_t len = strlen(table->data), copied;
2268
2269 /* This shouldn't trigger for a well-formed sysctl */
2270 if (len > table->maxlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002271 len = table->maxlen;
Linus Torvaldsde9e0072005-12-31 17:00:29 -08002272
2273 /* Copy up to a max of bufsize-1 bytes of the string */
2274 copied = (len >= bufsize) ? bufsize - 1 : len;
2275
2276 if (copy_to_user(oldval, table->data, copied) ||
2277 put_user(0, (char __user *)(oldval + copied)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278 return -EFAULT;
Linus Torvaldsde9e0072005-12-31 17:00:29 -08002279 if (put_user(len, oldlenp))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002280 return -EFAULT;
2281 }
2282 }
2283 if (newval && newlen) {
Linus Torvaldsde9e0072005-12-31 17:00:29 -08002284 size_t len = newlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285 if (len > table->maxlen)
2286 len = table->maxlen;
2287 if(copy_from_user(table->data, newval, len))
2288 return -EFAULT;
2289 if (len == table->maxlen)
2290 len--;
2291 ((char *) table->data)[len] = 0;
2292 }
Yi Yang82c9df82005-12-30 16:37:10 +08002293 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002294}
2295
2296/*
2297 * This function makes sure that all of the integers in the vector
2298 * are between the minimum and maximum values given in the arrays
2299 * table->extra1 and table->extra2, respectively.
2300 */
2301int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2302 void __user *oldval, size_t __user *oldlenp,
2303 void __user *newval, size_t newlen, void **context)
2304{
2305
2306 if (newval && newlen) {
2307 int __user *vec = (int __user *) newval;
2308 int *min = (int *) table->extra1;
2309 int *max = (int *) table->extra2;
2310 size_t length;
2311 int i;
2312
2313 if (newlen % sizeof(int) != 0)
2314 return -EINVAL;
2315
2316 if (!table->extra1 && !table->extra2)
2317 return 0;
2318
2319 if (newlen > table->maxlen)
2320 newlen = table->maxlen;
2321 length = newlen / sizeof(int);
2322
2323 for (i = 0; i < length; i++) {
2324 int value;
2325 if (get_user(value, vec + i))
2326 return -EFAULT;
2327 if (min && value < min[i])
2328 return -EINVAL;
2329 if (max && value > max[i])
2330 return -EINVAL;
2331 }
2332 }
2333 return 0;
2334}
2335
2336/* Strategy function to convert jiffies to seconds */
2337int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2338 void __user *oldval, size_t __user *oldlenp,
2339 void __user *newval, size_t newlen, void **context)
2340{
2341 if (oldval) {
2342 size_t olen;
2343 if (oldlenp) {
2344 if (get_user(olen, oldlenp))
2345 return -EFAULT;
2346 if (olen!=sizeof(int))
2347 return -EINVAL;
2348 }
2349 if (put_user(*(int *)(table->data)/HZ, (int __user *)oldval) ||
2350 (oldlenp && put_user(sizeof(int),oldlenp)))
2351 return -EFAULT;
2352 }
2353 if (newval && newlen) {
2354 int new;
2355 if (newlen != sizeof(int))
2356 return -EINVAL;
2357 if (get_user(new, (int __user *)newval))
2358 return -EFAULT;
2359 *(int *)(table->data) = new*HZ;
2360 }
2361 return 1;
2362}
2363
2364/* Strategy function to convert jiffies to seconds */
2365int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2366 void __user *oldval, size_t __user *oldlenp,
2367 void __user *newval, size_t newlen, void **context)
2368{
2369 if (oldval) {
2370 size_t olen;
2371 if (oldlenp) {
2372 if (get_user(olen, oldlenp))
2373 return -EFAULT;
2374 if (olen!=sizeof(int))
2375 return -EINVAL;
2376 }
2377 if (put_user(jiffies_to_msecs(*(int *)(table->data)), (int __user *)oldval) ||
2378 (oldlenp && put_user(sizeof(int),oldlenp)))
2379 return -EFAULT;
2380 }
2381 if (newval && newlen) {
2382 int new;
2383 if (newlen != sizeof(int))
2384 return -EINVAL;
2385 if (get_user(new, (int __user *)newval))
2386 return -EFAULT;
2387 *(int *)(table->data) = msecs_to_jiffies(new);
2388 }
2389 return 1;
2390}
2391
2392#else /* CONFIG_SYSCTL */
2393
2394
2395asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2396{
2397 return -ENOSYS;
2398}
2399
2400int sysctl_string(ctl_table *table, int __user *name, int nlen,
2401 void __user *oldval, size_t __user *oldlenp,
2402 void __user *newval, size_t newlen, void **context)
2403{
2404 return -ENOSYS;
2405}
2406
2407int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2408 void __user *oldval, size_t __user *oldlenp,
2409 void __user *newval, size_t newlen, void **context)
2410{
2411 return -ENOSYS;
2412}
2413
2414int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2415 void __user *oldval, size_t __user *oldlenp,
2416 void __user *newval, size_t newlen, void **context)
2417{
2418 return -ENOSYS;
2419}
2420
2421int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2422 void __user *oldval, size_t __user *oldlenp,
2423 void __user *newval, size_t newlen, void **context)
2424{
2425 return -ENOSYS;
2426}
2427
2428int proc_dostring(ctl_table *table, int write, struct file *filp,
2429 void __user *buffer, size_t *lenp, loff_t *ppos)
2430{
2431 return -ENOSYS;
2432}
2433
2434int proc_dointvec(ctl_table *table, int write, struct file *filp,
2435 void __user *buffer, size_t *lenp, loff_t *ppos)
2436{
2437 return -ENOSYS;
2438}
2439
2440int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2441 void __user *buffer, size_t *lenp, loff_t *ppos)
2442{
2443 return -ENOSYS;
2444}
2445
2446int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2447 void __user *buffer, size_t *lenp, loff_t *ppos)
2448{
2449 return -ENOSYS;
2450}
2451
2452int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2453 void __user *buffer, size_t *lenp, loff_t *ppos)
2454{
2455 return -ENOSYS;
2456}
2457
2458int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2459 void __user *buffer, size_t *lenp, loff_t *ppos)
2460{
2461 return -ENOSYS;
2462}
2463
2464int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2465 void __user *buffer, size_t *lenp, loff_t *ppos)
2466{
2467 return -ENOSYS;
2468}
2469
2470int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2471 void __user *buffer, size_t *lenp, loff_t *ppos)
2472{
2473 return -ENOSYS;
2474}
2475
2476int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2477 struct file *filp,
2478 void __user *buffer,
2479 size_t *lenp, loff_t *ppos)
2480{
2481 return -ENOSYS;
2482}
2483
2484struct ctl_table_header * register_sysctl_table(ctl_table * table,
2485 int insert_at_head)
2486{
2487 return NULL;
2488}
2489
2490void unregister_sysctl_table(struct ctl_table_header * table)
2491{
2492}
2493
2494#endif /* CONFIG_SYSCTL */
2495
2496/*
2497 * No sense putting this after each symbol definition, twice,
2498 * exception granted :-)
2499 */
2500EXPORT_SYMBOL(proc_dointvec);
2501EXPORT_SYMBOL(proc_dointvec_jiffies);
2502EXPORT_SYMBOL(proc_dointvec_minmax);
2503EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2504EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2505EXPORT_SYMBOL(proc_dostring);
2506EXPORT_SYMBOL(proc_doulongvec_minmax);
2507EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2508EXPORT_SYMBOL(register_sysctl_table);
2509EXPORT_SYMBOL(sysctl_intvec);
2510EXPORT_SYMBOL(sysctl_jiffies);
2511EXPORT_SYMBOL(sysctl_ms_jiffies);
2512EXPORT_SYMBOL(sysctl_string);
2513EXPORT_SYMBOL(unregister_sysctl_table);