Blame - kernel/sys.c - kernel/msm-4.9

blob: a74039036fb47cf9d27896477add07d4f41cb063 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/kernel/sys.c
				3	*
				4	* Copyright (C) 1991, 1992 Linus Torvalds
				5	*/
				6
				7	#include <linux/config.h>
				8	#include <linux/module.h>
				9	#include <linux/mm.h>
				10	#include <linux/utsname.h>
				11	#include <linux/mman.h>
				12	#include <linux/smp_lock.h>
				13	#include <linux/notifier.h>
				14	#include <linux/reboot.h>
				15	#include <linux/prctl.h>
				16	#include <linux/init.h>
				17	#include <linux/highuid.h>
				18	#include <linux/fs.h>
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	19	#include <linux/kernel.h>
				20	#include <linux/kexec.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	21	#include <linux/workqueue.h>
				22	#include <linux/device.h>
				23	#include <linux/key.h>
				24	#include <linux/times.h>
				25	#include <linux/posix-timers.h>
				26	#include <linux/security.h>
				27	#include <linux/dcookies.h>
				28	#include <linux/suspend.h>
				29	#include <linux/tty.h>
Jesper Juhl	7ed20e1	2005-05-01 08:59:14 -0700	[diff] [blame]	30	#include <linux/signal.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	31
				32	#include <linux/compat.h>
				33	#include <linux/syscalls.h>
				34
				35	#include <asm/uaccess.h>
				36	#include <asm/io.h>
				37	#include <asm/unistd.h>
				38
				39	#ifndef SET_UNALIGN_CTL
				40	# define SET_UNALIGN_CTL(a,b) (-EINVAL)
				41	#endif
				42	#ifndef GET_UNALIGN_CTL
				43	# define GET_UNALIGN_CTL(a,b) (-EINVAL)
				44	#endif
				45	#ifndef SET_FPEMU_CTL
				46	# define SET_FPEMU_CTL(a,b) (-EINVAL)
				47	#endif
				48	#ifndef GET_FPEMU_CTL
				49	# define GET_FPEMU_CTL(a,b) (-EINVAL)
				50	#endif
				51	#ifndef SET_FPEXC_CTL
				52	# define SET_FPEXC_CTL(a,b) (-EINVAL)
				53	#endif
				54	#ifndef GET_FPEXC_CTL
				55	# define GET_FPEXC_CTL(a,b) (-EINVAL)
				56	#endif
				57
				58	/*
				59	* this is where the system-wide overflow UID and GID are defined, for
				60	* architectures that now have 32-bit UID/GID but didn't in the past
				61	*/
				62
				63	int overflowuid = DEFAULT_OVERFLOWUID;
				64	int overflowgid = DEFAULT_OVERFLOWGID;
				65
				66	#ifdef CONFIG_UID16
				67	EXPORT_SYMBOL(overflowuid);
				68	EXPORT_SYMBOL(overflowgid);
				69	#endif
				70
				71	/*
				72	* the same as above, but for filesystems which can only store a 16-bit
				73	* UID and GID. as such, this is needed on all architectures
				74	*/
				75
				76	int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
				77	int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
				78
				79	EXPORT_SYMBOL(fs_overflowuid);
				80	EXPORT_SYMBOL(fs_overflowgid);
				81
				82	/*
				83	* this indicates whether you can reboot with ctrl-alt-del: the default is yes
				84	*/
				85
				86	int C_A_D = 1;
				87	int cad_pid = 1;
				88
				89	/*
				90	* Notifier list for kernel code which wants to be called
				91	* at shutdown. This is used to stop any idling DMA operations
				92	* and the like.
				93	*/
				94
				95	static struct notifier_block *reboot_notifier_list;
				96	static DEFINE_RWLOCK(notifier_lock);
				97
				98	/**
				99	* notifier_chain_register - Add notifier to a notifier chain
				100	* @list: Pointer to root list pointer
				101	* @n: New entry in notifier chain
				102	*
				103	* Adds a notifier to a notifier chain.
				104	*
				105	* Currently always returns zero.
				106	*/
				107
				108	int notifier_chain_register(struct notifier_block *list, struct notifier_block n)
				109	{
				110	write_lock(&notifier_lock);
				111	while(*list)
				112	{
				113	if(n->priority > (*list)->priority)
				114	break;
				115	list= &((*list)->next);
				116	}
				117	n->next = *list;
				118	*list=n;
				119	write_unlock(&notifier_lock);
				120	return 0;
				121	}
				122
				123	EXPORT_SYMBOL(notifier_chain_register);
				124
				125	/**
				126	* notifier_chain_unregister - Remove notifier from a notifier chain
				127	* @nl: Pointer to root list pointer
				128	* @n: New entry in notifier chain
				129	*
				130	* Removes a notifier from a notifier chain.
				131	*
				132	* Returns zero on success, or %-ENOENT on failure.
				133	*/
				134
				135	int notifier_chain_unregister(struct notifier_block *nl, struct notifier_block n)
				136	{
				137	write_lock(&notifier_lock);
				138	while((*nl)!=NULL)
				139	{
				140	if((*nl)==n)
				141	{
				142	*nl=n->next;
				143	write_unlock(&notifier_lock);
				144	return 0;
				145	}
				146	nl=&((*nl)->next);
				147	}
				148	write_unlock(&notifier_lock);
				149	return -ENOENT;
				150	}
				151
				152	EXPORT_SYMBOL(notifier_chain_unregister);
				153
				154	/**
				155	* notifier_call_chain - Call functions in a notifier chain
				156	* @n: Pointer to root pointer of notifier chain
				157	* @val: Value passed unmodified to notifier function
				158	* @v: Pointer passed unmodified to notifier function
				159	*
				160	* Calls each function in a notifier chain in turn.
				161	*
				162	* If the return value of the notifier can be and'd
				163	* with %NOTIFY_STOP_MASK, then notifier_call_chain
				164	* will return immediately, with the return value of
				165	* the notifier function which halted execution.
				166	* Otherwise, the return value is the return value
				167	* of the last notifier function called.
				168	*/
				169
				170	int notifier_call_chain(struct notifier_block *n, unsigned long val, void v)
				171	{
				172	int ret=NOTIFY_DONE;
				173	struct notifier_block nb = n;
				174
				175	while(nb)
				176	{
				177	ret=nb->notifier_call(nb,val,v);
				178	if(ret&NOTIFY_STOP_MASK)
				179	{
				180	return ret;
				181	}
				182	nb=nb->next;
				183	}
				184	return ret;
				185	}
				186
				187	EXPORT_SYMBOL(notifier_call_chain);
				188
				189	/**
				190	* register_reboot_notifier - Register function to be called at reboot time
				191	* @nb: Info about notifier function to be called
				192	*
				193	* Registers a function with the list of functions
				194	* to be called at reboot time.
				195	*
				196	* Currently always returns zero, as notifier_chain_register
				197	* always returns zero.
				198	*/
				199
				200	int register_reboot_notifier(struct notifier_block * nb)
				201	{
				202	return notifier_chain_register(&reboot_notifier_list, nb);
				203	}
				204
				205	EXPORT_SYMBOL(register_reboot_notifier);
				206
				207	/**
				208	* unregister_reboot_notifier - Unregister previously registered reboot notifier
				209	* @nb: Hook to be unregistered
				210	*
				211	* Unregisters a previously registered reboot
				212	* notifier function.
				213	*
				214	* Returns zero on success, or %-ENOENT on failure.
				215	*/
				216
				217	int unregister_reboot_notifier(struct notifier_block * nb)
				218	{
				219	return notifier_chain_unregister(&reboot_notifier_list, nb);
				220	}
				221
				222	EXPORT_SYMBOL(unregister_reboot_notifier);
				223
				224	static int set_one_prio(struct task_struct *p, int niceval, int error)
				225	{
				226	int no_nice;
				227
				228	if (p->uid != current->euid &&
				229	p->euid != current->euid && !capable(CAP_SYS_NICE)) {
				230	error = -EPERM;
				231	goto out;
				232	}
Matt Mackall	e43379f	2005-05-01 08:59:00 -0700	[diff] [blame]	233	if (niceval < task_nice(p) && !can_nice(p, niceval)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	234	error = -EACCES;
				235	goto out;
				236	}
				237	no_nice = security_task_setnice(p, niceval);
				238	if (no_nice) {
				239	error = no_nice;
				240	goto out;
				241	}
				242	if (error == -ESRCH)
				243	error = 0;
				244	set_user_nice(p, niceval);
				245	out:
				246	return error;
				247	}
				248
				249	asmlinkage long sys_setpriority(int which, int who, int niceval)
				250	{
				251	struct task_struct g, p;
				252	struct user_struct *user;
				253	int error = -EINVAL;
				254
				255	if (which > 2 \|\| which < 0)
				256	goto out;
				257
				258	/* normalize: avoid signed division (rounding problems) */
				259	error = -ESRCH;
				260	if (niceval < -20)
				261	niceval = -20;
				262	if (niceval > 19)
				263	niceval = 19;
				264
				265	read_lock(&tasklist_lock);
				266	switch (which) {
				267	case PRIO_PROCESS:
				268	if (!who)
				269	who = current->pid;
				270	p = find_task_by_pid(who);
				271	if (p)
				272	error = set_one_prio(p, niceval, error);
				273	break;
				274	case PRIO_PGRP:
				275	if (!who)
				276	who = process_group(current);
				277	do_each_task_pid(who, PIDTYPE_PGID, p) {
				278	error = set_one_prio(p, niceval, error);
				279	} while_each_task_pid(who, PIDTYPE_PGID, p);
				280	break;
				281	case PRIO_USER:
				282	user = current->user;
				283	if (!who)
				284	who = current->uid;
				285	else
				286	if ((who != current->uid) && !(user = find_user(who)))
				287	goto out_unlock; /* No processes for this user */
				288
				289	do_each_thread(g, p)
				290	if (p->uid == who)
				291	error = set_one_prio(p, niceval, error);
				292	while_each_thread(g, p);
				293	if (who != current->uid)
				294	free_uid(user); /* For find_user() */
				295	break;
				296	}
				297	out_unlock:
				298	read_unlock(&tasklist_lock);
				299	out:
				300	return error;
				301	}
				302
				303	/*
				304	* Ugh. To avoid negative return values, "getpriority()" will
				305	* not return the normal nice-value, but a negated value that
				306	* has been offset by 20 (ie it returns 40..1 instead of -20..19)
				307	* to stay compatible.
				308	*/
				309	asmlinkage long sys_getpriority(int which, int who)
				310	{
				311	struct task_struct g, p;
				312	struct user_struct *user;
				313	long niceval, retval = -ESRCH;
				314
				315	if (which > 2 \|\| which < 0)
				316	return -EINVAL;
				317
				318	read_lock(&tasklist_lock);
				319	switch (which) {
				320	case PRIO_PROCESS:
				321	if (!who)
				322	who = current->pid;
				323	p = find_task_by_pid(who);
				324	if (p) {
				325	niceval = 20 - task_nice(p);
				326	if (niceval > retval)
				327	retval = niceval;
				328	}
				329	break;
				330	case PRIO_PGRP:
				331	if (!who)
				332	who = process_group(current);
				333	do_each_task_pid(who, PIDTYPE_PGID, p) {
				334	niceval = 20 - task_nice(p);
				335	if (niceval > retval)
				336	retval = niceval;
				337	} while_each_task_pid(who, PIDTYPE_PGID, p);
				338	break;
				339	case PRIO_USER:
				340	user = current->user;
				341	if (!who)
				342	who = current->uid;
				343	else
				344	if ((who != current->uid) && !(user = find_user(who)))
				345	goto out_unlock; /* No processes for this user */
				346
				347	do_each_thread(g, p)
				348	if (p->uid == who) {
				349	niceval = 20 - task_nice(p);
				350	if (niceval > retval)
				351	retval = niceval;
				352	}
				353	while_each_thread(g, p);
				354	if (who != current->uid)
				355	free_uid(user); /* for find_user() */
				356	break;
				357	}
				358	out_unlock:
				359	read_unlock(&tasklist_lock);
				360
				361	return retval;
				362	}
				363
Eric W. Biederman	7c90347	2005-07-26 11:29:55 -0600	[diff] [blame^]	364	void emergency_restart(void)
				365	{
				366	machine_emergency_restart();
				367	}
				368	EXPORT_SYMBOL_GPL(emergency_restart);
				369
Eric W. Biederman	4a00ea1	2005-07-26 11:24:14 -0600	[diff] [blame]	370	void kernel_restart(char *cmd)
				371	{
				372	notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
				373	system_state = SYSTEM_RESTART;
				374	device_suspend(PMSG_FREEZE);
				375	device_shutdown();
				376	if (!cmd) {
				377	printk(KERN_EMERG "Restarting system.\n");
				378	} else {
				379	printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
				380	}
				381	printk(".\n");
				382	machine_restart(cmd);
				383	}
				384	EXPORT_SYMBOL_GPL(kernel_restart);
				385
				386	void kernel_kexec(void)
				387	{
				388	#ifdef CONFIG_KEXEC
				389	struct kimage *image;
				390	image = xchg(&kexec_image, 0);
				391	if (!image) {
				392	return;
				393	}
				394	notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
				395	system_state = SYSTEM_RESTART;
				396	device_suspend(PMSG_FREEZE);
				397	device_shutdown();
				398	printk(KERN_EMERG "Starting new kernel\n");
				399	machine_shutdown();
				400	machine_kexec(image);
				401	#endif
				402	}
				403	EXPORT_SYMBOL_GPL(kernel_kexec);
				404
				405	void kernel_halt(void)
				406	{
				407	notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
				408	system_state = SYSTEM_HALT;
				409	device_suspend(PMSG_SUSPEND);
				410	device_shutdown();
				411	printk(KERN_EMERG "System halted.\n");
				412	machine_halt();
				413	}
				414	EXPORT_SYMBOL_GPL(kernel_halt);
				415
				416	void kernel_power_off(void)
				417	{
				418	notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
				419	system_state = SYSTEM_POWER_OFF;
				420	device_suspend(PMSG_SUSPEND);
				421	device_shutdown();
				422	printk(KERN_EMERG "Power down.\n");
				423	machine_power_off();
				424	}
				425	EXPORT_SYMBOL_GPL(kernel_power_off);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	426
				427	/*
				428	* Reboot system call: for obvious reasons only root may call it,
				429	* and even root needs to set up some magic numbers in the registers
				430	* so that some mistake won't make this reboot the whole machine.
				431	* You can also set the meaning of the ctrl-alt-del-key here.
				432	*
				433	* reboot doesn't sync: do that yourself before calling this.
				434	*/
				435	asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user * arg)
				436	{
				437	char buffer[256];
				438
				439	/* We only trust the superuser with rebooting the system. */
				440	if (!capable(CAP_SYS_BOOT))
				441	return -EPERM;
				442
				443	/* For safety, we require "magic" arguments. */
				444	if (magic1 != LINUX_REBOOT_MAGIC1 \|\|
				445	(magic2 != LINUX_REBOOT_MAGIC2 &&
				446	magic2 != LINUX_REBOOT_MAGIC2A &&
				447	magic2 != LINUX_REBOOT_MAGIC2B &&
				448	magic2 != LINUX_REBOOT_MAGIC2C))
				449	return -EINVAL;
				450
				451	lock_kernel();
				452	switch (cmd) {
				453	case LINUX_REBOOT_CMD_RESTART:
Eric W. Biederman	4a00ea1	2005-07-26 11:24:14 -0600	[diff] [blame]	454	kernel_restart(NULL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	455	break;
				456
				457	case LINUX_REBOOT_CMD_CAD_ON:
				458	C_A_D = 1;
				459	break;
				460
				461	case LINUX_REBOOT_CMD_CAD_OFF:
				462	C_A_D = 0;
				463	break;
				464
				465	case LINUX_REBOOT_CMD_HALT:
Eric W. Biederman	4a00ea1	2005-07-26 11:24:14 -0600	[diff] [blame]	466	kernel_halt();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	467	unlock_kernel();
				468	do_exit(0);
				469	break;
				470
				471	case LINUX_REBOOT_CMD_POWER_OFF:
Eric W. Biederman	4a00ea1	2005-07-26 11:24:14 -0600	[diff] [blame]	472	kernel_power_off();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	473	unlock_kernel();
				474	do_exit(0);
				475	break;
				476
				477	case LINUX_REBOOT_CMD_RESTART2:
				478	if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) {
				479	unlock_kernel();
				480	return -EFAULT;
				481	}
				482	buffer[sizeof(buffer) - 1] = '\0';
				483
Eric W. Biederman	4a00ea1	2005-07-26 11:24:14 -0600	[diff] [blame]	484	kernel_restart(buffer);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	485	break;
				486
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	487	case LINUX_REBOOT_CMD_KEXEC:
Eric W. Biederman	4a00ea1	2005-07-26 11:24:14 -0600	[diff] [blame]	488	kernel_kexec();
				489	unlock_kernel();
				490	return -EINVAL;
				491
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	492	#ifdef CONFIG_SOFTWARE_SUSPEND
				493	case LINUX_REBOOT_CMD_SW_SUSPEND:
				494	{
				495	int ret = software_suspend();
				496	unlock_kernel();
				497	return ret;
				498	}
				499	#endif
				500
				501	default:
				502	unlock_kernel();
				503	return -EINVAL;
				504	}
				505	unlock_kernel();
				506	return 0;
				507	}
				508
				509	static void deferred_cad(void *dummy)
				510	{
Eric W. Biederman	abcd9e5	2005-07-26 11:27:34 -0600	[diff] [blame]	511	kernel_restart(NULL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	512	}
				513
				514	/*
				515	* This function gets called by ctrl-alt-del - ie the keyboard interrupt.
				516	* As it's called within an interrupt, it may NOT sync: the only choice
				517	* is whether to reboot at once, or just ignore the ctrl-alt-del.
				518	*/
				519	void ctrl_alt_del(void)
				520	{
				521	static DECLARE_WORK(cad_work, deferred_cad, NULL);
				522
				523	if (C_A_D)
				524	schedule_work(&cad_work);
				525	else
				526	kill_proc(cad_pid, SIGINT, 1);
				527	}
				528
				529
				530	/*
				531	* Unprivileged users may change the real gid to the effective gid
				532	* or vice versa. (BSD-style)
				533	*
				534	* If you set the real gid at all, or set the effective gid to a value not
				535	* equal to the real gid, then the saved gid is set to the new effective gid.
				536	*
				537	* This makes it possible for a setgid program to completely drop its
				538	* privileges, which is often a useful assertion to make when you are doing
				539	* a security audit over a program.
				540	*
				541	* The general idea is that a program which uses just setregid() will be
				542	* 100% compatible with BSD. A program which uses just setgid() will be
				543	* 100% compatible with POSIX with saved IDs.
				544	*
				545	* SMP: There are not races, the GIDs are checked only by filesystem
				546	* operations (as far as semantic preservation is concerned).
				547	*/
				548	asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
				549	{
				550	int old_rgid = current->gid;
				551	int old_egid = current->egid;
				552	int new_rgid = old_rgid;
				553	int new_egid = old_egid;
				554	int retval;
				555
				556	retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE);
				557	if (retval)
				558	return retval;
				559
				560	if (rgid != (gid_t) -1) {
				561	if ((old_rgid == rgid) \|\|
				562	(current->egid==rgid) \|\|
				563	capable(CAP_SETGID))
				564	new_rgid = rgid;
				565	else
				566	return -EPERM;
				567	}
				568	if (egid != (gid_t) -1) {
				569	if ((old_rgid == egid) \|\|
				570	(current->egid == egid) \|\|
				571	(current->sgid == egid) \|\|
				572	capable(CAP_SETGID))
				573	new_egid = egid;
				574	else {
				575	return -EPERM;
				576	}
				577	}
				578	if (new_egid != old_egid)
				579	{
Alan Cox	d6e7114	2005-06-23 00:09:43 -0700	[diff] [blame]	580	current->mm->dumpable = suid_dumpable;
akpm@osdl.org	d59dd46	2005-05-01 08:58:47 -0700	[diff] [blame]	581	smp_wmb();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	582	}
				583	if (rgid != (gid_t) -1 \|\|
				584	(egid != (gid_t) -1 && egid != old_rgid))
				585	current->sgid = new_egid;
				586	current->fsgid = new_egid;
				587	current->egid = new_egid;
				588	current->gid = new_rgid;
				589	key_fsgid_changed(current);
				590	return 0;
				591	}
				592
				593	/*
				594	* setgid() is implemented like SysV w/ SAVED_IDS
				595	*
				596	* SMP: Same implicit races as above.
				597	*/
				598	asmlinkage long sys_setgid(gid_t gid)
				599	{
				600	int old_egid = current->egid;
				601	int retval;
				602
				603	retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID);
				604	if (retval)
				605	return retval;
				606
				607	if (capable(CAP_SETGID))
				608	{
				609	if(old_egid != gid)
				610	{
Alan Cox	d6e7114	2005-06-23 00:09:43 -0700	[diff] [blame]	611	current->mm->dumpable = suid_dumpable;
akpm@osdl.org	d59dd46	2005-05-01 08:58:47 -0700	[diff] [blame]	612	smp_wmb();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	613	}
				614	current->gid = current->egid = current->sgid = current->fsgid = gid;
				615	}
				616	else if ((gid == current->gid) \|\| (gid == current->sgid))
				617	{
				618	if(old_egid != gid)
				619	{
Alan Cox	d6e7114	2005-06-23 00:09:43 -0700	[diff] [blame]	620	current->mm->dumpable = suid_dumpable;
akpm@osdl.org	d59dd46	2005-05-01 08:58:47 -0700	[diff] [blame]	621	smp_wmb();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	622	}
				623	current->egid = current->fsgid = gid;
				624	}
				625	else
				626	return -EPERM;
				627
				628	key_fsgid_changed(current);
				629	return 0;
				630	}
				631
				632	static int set_user(uid_t new_ruid, int dumpclear)
				633	{
				634	struct user_struct *new_user;
				635
				636	new_user = alloc_uid(new_ruid);
				637	if (!new_user)
				638	return -EAGAIN;
				639
				640	if (atomic_read(&new_user->processes) >=
				641	current->signal->rlim[RLIMIT_NPROC].rlim_cur &&
				642	new_user != &root_user) {
				643	free_uid(new_user);
				644	return -EAGAIN;
				645	}
				646
				647	switch_uid(new_user);
				648
				649	if(dumpclear)
				650	{
Alan Cox	d6e7114	2005-06-23 00:09:43 -0700	[diff] [blame]	651	current->mm->dumpable = suid_dumpable;
akpm@osdl.org	d59dd46	2005-05-01 08:58:47 -0700	[diff] [blame]	652	smp_wmb();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	653	}
				654	current->uid = new_ruid;
				655	return 0;
				656	}
				657
				658	/*
				659	* Unprivileged users may change the real uid to the effective uid
				660	* or vice versa. (BSD-style)
				661	*
				662	* If you set the real uid at all, or set the effective uid to a value not
				663	* equal to the real uid, then the saved uid is set to the new effective uid.
				664	*
				665	* This makes it possible for a setuid program to completely drop its
				666	* privileges, which is often a useful assertion to make when you are doing
				667	* a security audit over a program.
				668	*
				669	* The general idea is that a program which uses just setreuid() will be
				670	* 100% compatible with BSD. A program which uses just setuid() will be
				671	* 100% compatible with POSIX with saved IDs.
				672	*/
				673	asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
				674	{
				675	int old_ruid, old_euid, old_suid, new_ruid, new_euid;
				676	int retval;
				677
				678	retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE);
				679	if (retval)
				680	return retval;
				681
				682	new_ruid = old_ruid = current->uid;
				683	new_euid = old_euid = current->euid;
				684	old_suid = current->suid;
				685
				686	if (ruid != (uid_t) -1) {
				687	new_ruid = ruid;
				688	if ((old_ruid != ruid) &&
				689	(current->euid != ruid) &&
				690	!capable(CAP_SETUID))
				691	return -EPERM;
				692	}
				693
				694	if (euid != (uid_t) -1) {
				695	new_euid = euid;
				696	if ((old_ruid != euid) &&
				697	(current->euid != euid) &&
				698	(current->suid != euid) &&
				699	!capable(CAP_SETUID))
				700	return -EPERM;
				701	}
				702
				703	if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0)
				704	return -EAGAIN;
				705
				706	if (new_euid != old_euid)
				707	{
Alan Cox	d6e7114	2005-06-23 00:09:43 -0700	[diff] [blame]	708	current->mm->dumpable = suid_dumpable;
akpm@osdl.org	d59dd46	2005-05-01 08:58:47 -0700	[diff] [blame]	709	smp_wmb();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	710	}
				711	current->fsuid = current->euid = new_euid;
				712	if (ruid != (uid_t) -1 \|\|
				713	(euid != (uid_t) -1 && euid != old_ruid))
				714	current->suid = current->euid;
				715	current->fsuid = current->euid;
				716
				717	key_fsuid_changed(current);
				718
				719	return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE);
				720	}
				721
				722
				723
				724	/*
				725	* setuid() is implemented like SysV with SAVED_IDS
				726	*
				727	* Note that SAVED_ID's is deficient in that a setuid root program
				728	* like sendmail, for example, cannot set its uid to be a normal
				729	* user and then switch back, because if you're root, setuid() sets
				730	* the saved uid too. If you don't like this, blame the bright people
				731	* in the POSIX committee and/or USG. Note that the BSD-style setreuid()
				732	* will allow a root program to temporarily drop privileges and be able to
				733	* regain them by swapping the real and effective uid.
				734	*/
				735	asmlinkage long sys_setuid(uid_t uid)
				736	{
				737	int old_euid = current->euid;
				738	int old_ruid, old_suid, new_ruid, new_suid;
				739	int retval;
				740
				741	retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID);
				742	if (retval)
				743	return retval;
				744
				745	old_ruid = new_ruid = current->uid;
				746	old_suid = current->suid;
				747	new_suid = old_suid;
				748
				749	if (capable(CAP_SETUID)) {
				750	if (uid != old_ruid && set_user(uid, old_euid != uid) < 0)
				751	return -EAGAIN;
				752	new_suid = uid;
				753	} else if ((uid != current->uid) && (uid != new_suid))
				754	return -EPERM;
				755
				756	if (old_euid != uid)
				757	{
Alan Cox	d6e7114	2005-06-23 00:09:43 -0700	[diff] [blame]	758	current->mm->dumpable = suid_dumpable;
akpm@osdl.org	d59dd46	2005-05-01 08:58:47 -0700	[diff] [blame]	759	smp_wmb();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	760	}
				761	current->fsuid = current->euid = uid;
				762	current->suid = new_suid;
				763
				764	key_fsuid_changed(current);
				765
				766	return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID);
				767	}
				768
				769
				770	/*
				771	* This function implements a generic ability to update ruid, euid,
				772	* and suid. This allows you to implement the 4.4 compatible seteuid().
				773	*/
				774	asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
				775	{
				776	int old_ruid = current->uid;
				777	int old_euid = current->euid;
				778	int old_suid = current->suid;
				779	int retval;
				780
				781	retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES);
				782	if (retval)
				783	return retval;
				784
				785	if (!capable(CAP_SETUID)) {
				786	if ((ruid != (uid_t) -1) && (ruid != current->uid) &&
				787	(ruid != current->euid) && (ruid != current->suid))
				788	return -EPERM;
				789	if ((euid != (uid_t) -1) && (euid != current->uid) &&
				790	(euid != current->euid) && (euid != current->suid))
				791	return -EPERM;
				792	if ((suid != (uid_t) -1) && (suid != current->uid) &&
				793	(suid != current->euid) && (suid != current->suid))
				794	return -EPERM;
				795	}
				796	if (ruid != (uid_t) -1) {
				797	if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0)
				798	return -EAGAIN;
				799	}
				800	if (euid != (uid_t) -1) {
				801	if (euid != current->euid)
				802	{
Alan Cox	d6e7114	2005-06-23 00:09:43 -0700	[diff] [blame]	803	current->mm->dumpable = suid_dumpable;
akpm@osdl.org	d59dd46	2005-05-01 08:58:47 -0700	[diff] [blame]	804	smp_wmb();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	805	}
				806	current->euid = euid;
				807	}
				808	current->fsuid = current->euid;
				809	if (suid != (uid_t) -1)
				810	current->suid = suid;
				811
				812	key_fsuid_changed(current);
				813
				814	return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES);
				815	}
				816
				817	asmlinkage long sys_getresuid(uid_t __user ruid, uid_t __user euid, uid_t __user *suid)
				818	{
				819	int retval;
				820
				821	if (!(retval = put_user(current->uid, ruid)) &&
				822	!(retval = put_user(current->euid, euid)))
				823	retval = put_user(current->suid, suid);
				824
				825	return retval;
				826	}
				827
				828	/*
				829	* Same as above, but for rgid, egid, sgid.
				830	*/
				831	asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
				832	{
				833	int retval;
				834
				835	retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES);
				836	if (retval)
				837	return retval;
				838
				839	if (!capable(CAP_SETGID)) {
				840	if ((rgid != (gid_t) -1) && (rgid != current->gid) &&
				841	(rgid != current->egid) && (rgid != current->sgid))
				842	return -EPERM;
				843	if ((egid != (gid_t) -1) && (egid != current->gid) &&
				844	(egid != current->egid) && (egid != current->sgid))
				845	return -EPERM;
				846	if ((sgid != (gid_t) -1) && (sgid != current->gid) &&
				847	(sgid != current->egid) && (sgid != current->sgid))
				848	return -EPERM;
				849	}
				850	if (egid != (gid_t) -1) {
				851	if (egid != current->egid)
				852	{
Alan Cox	d6e7114	2005-06-23 00:09:43 -0700	[diff] [blame]	853	current->mm->dumpable = suid_dumpable;
akpm@osdl.org	d59dd46	2005-05-01 08:58:47 -0700	[diff] [blame]	854	smp_wmb();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	855	}
				856	current->egid = egid;
				857	}
				858	current->fsgid = current->egid;
				859	if (rgid != (gid_t) -1)
				860	current->gid = rgid;
				861	if (sgid != (gid_t) -1)
				862	current->sgid = sgid;
				863
				864	key_fsgid_changed(current);
				865	return 0;
				866	}
				867
				868	asmlinkage long sys_getresgid(gid_t __user rgid, gid_t __user egid, gid_t __user *sgid)
				869	{
				870	int retval;
				871
				872	if (!(retval = put_user(current->gid, rgid)) &&
				873	!(retval = put_user(current->egid, egid)))
				874	retval = put_user(current->sgid, sgid);
				875
				876	return retval;
				877	}
				878
				879
				880	/*
				881	* "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
				882	* is used for "access()" and for the NFS daemon (letting nfsd stay at
				883	* whatever uid it wants to). It normally shadows "euid", except when
				884	* explicitly set by setfsuid() or for access..
				885	*/
				886	asmlinkage long sys_setfsuid(uid_t uid)
				887	{
				888	int old_fsuid;
				889
				890	old_fsuid = current->fsuid;
				891	if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS))
				892	return old_fsuid;
				893
				894	if (uid == current->uid \|\| uid == current->euid \|\|
				895	uid == current->suid \|\| uid == current->fsuid \|\|
				896	capable(CAP_SETUID))
				897	{
				898	if (uid != old_fsuid)
				899	{
Alan Cox	d6e7114	2005-06-23 00:09:43 -0700	[diff] [blame]	900	current->mm->dumpable = suid_dumpable;
akpm@osdl.org	d59dd46	2005-05-01 08:58:47 -0700	[diff] [blame]	901	smp_wmb();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	902	}
				903	current->fsuid = uid;
				904	}
				905
				906	key_fsuid_changed(current);
				907
				908	security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS);
				909
				910	return old_fsuid;
				911	}
				912
				913	/*
				914	* Samma på svenska..
				915	*/
				916	asmlinkage long sys_setfsgid(gid_t gid)
				917	{
				918	int old_fsgid;
				919
				920	old_fsgid = current->fsgid;
				921	if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS))
				922	return old_fsgid;
				923
				924	if (gid == current->gid \|\| gid == current->egid \|\|
				925	gid == current->sgid \|\| gid == current->fsgid \|\|
				926	capable(CAP_SETGID))
				927	{
				928	if (gid != old_fsgid)
				929	{
Alan Cox	d6e7114	2005-06-23 00:09:43 -0700	[diff] [blame]	930	current->mm->dumpable = suid_dumpable;
akpm@osdl.org	d59dd46	2005-05-01 08:58:47 -0700	[diff] [blame]	931	smp_wmb();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	932	}
				933	current->fsgid = gid;
				934	key_fsgid_changed(current);
				935	}
				936	return old_fsgid;
				937	}
				938
				939	asmlinkage long sys_times(struct tms __user * tbuf)
				940	{
				941	/*
				942	* In the SMP world we might just be unlucky and have one of
				943	* the times increment as we use it. Since the value is an
				944	* atomically safe type this is just fine. Conceptually its
				945	* as if the syscall took an instant longer to occur.
				946	*/
				947	if (tbuf) {
				948	struct tms tmp;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	949	cputime_t utime, stime, cutime, cstime;
				950
Christoph Lameter	71a2224	2005-06-23 00:10:05 -0700	[diff] [blame]	951	#ifdef CONFIG_SMP
				952	if (thread_group_empty(current)) {
				953	/*
				954	* Single thread case without the use of any locks.
				955	*
				956	* We may race with release_task if two threads are
				957	* executing. However, release task first adds up the
				958	* counters (__exit_signal) before removing the task
				959	* from the process tasklist (__unhash_process).
				960	* __exit_signal also acquires and releases the
				961	* siglock which results in the proper memory ordering
				962	* so that the list modifications are always visible
				963	* after the counters have been updated.
				964	*
				965	* If the counters have been updated by the second thread
				966	* but the thread has not yet been removed from the list
				967	* then the other branch will be executing which will
				968	* block on tasklist_lock until the exit handling of the
				969	* other task is finished.
				970	*
				971	* This also implies that the sighand->siglock cannot
				972	* be held by another processor. So we can also
				973	* skip acquiring that lock.
				974	*/
				975	utime = cputime_add(current->signal->utime, current->utime);
				976	stime = cputime_add(current->signal->utime, current->stime);
				977	cutime = current->signal->cutime;
				978	cstime = current->signal->cstime;
				979	} else
				980	#endif
				981	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	982
Christoph Lameter	71a2224	2005-06-23 00:10:05 -0700	[diff] [blame]	983	/* Process with multiple threads */
				984	struct task_struct *tsk = current;
				985	struct task_struct *t;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	986
Christoph Lameter	71a2224	2005-06-23 00:10:05 -0700	[diff] [blame]	987	read_lock(&tasklist_lock);
				988	utime = tsk->signal->utime;
				989	stime = tsk->signal->stime;
				990	t = tsk;
				991	do {
				992	utime = cputime_add(utime, t->utime);
				993	stime = cputime_add(stime, t->stime);
				994	t = next_thread(t);
				995	} while (t != tsk);
				996
				997	/*
				998	* While we have tasklist_lock read-locked, no dying thread
				999	* can be updating current->signal->[us]time. Instead,
				1000	* we got their counts included in the live thread loop.
				1001	* However, another thread can come in right now and
				1002	* do a wait call that updates current->signal->c[us]time.
				1003	* To make sure we always see that pair updated atomically,
				1004	* we take the siglock around fetching them.
				1005	*/
				1006	spin_lock_irq(&tsk->sighand->siglock);
				1007	cutime = tsk->signal->cutime;
				1008	cstime = tsk->signal->cstime;
				1009	spin_unlock_irq(&tsk->sighand->siglock);
				1010	read_unlock(&tasklist_lock);
				1011	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1012	tmp.tms_utime = cputime_to_clock_t(utime);
				1013	tmp.tms_stime = cputime_to_clock_t(stime);
				1014	tmp.tms_cutime = cputime_to_clock_t(cutime);
				1015	tmp.tms_cstime = cputime_to_clock_t(cstime);
				1016	if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
				1017	return -EFAULT;
				1018	}
				1019	return (long) jiffies_64_to_clock_t(get_jiffies_64());
				1020	}
				1021
				1022	/*
				1023	* This needs some heavy checking ...
				1024	* I just haven't the stomach for it. I also don't fully
				1025	* understand sessions/pgrp etc. Let somebody who does explain it.
				1026	*
				1027	* OK, I think I have the protection semantics right.... this is really
				1028	* only important on a multi-user system anyway, to make sure one user
				1029	* can't send a signal to a process owned by another. -TYT, 12/12/91
				1030	*
				1031	* Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
				1032	* LBT 04.03.94
				1033	*/
				1034
				1035	asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
				1036	{
				1037	struct task_struct *p;
				1038	int err = -EINVAL;
				1039
				1040	if (!pid)
				1041	pid = current->pid;
				1042	if (!pgid)
				1043	pgid = pid;
				1044	if (pgid < 0)
				1045	return -EINVAL;
				1046
				1047	/* From this point forward we keep holding onto the tasklist lock
				1048	* so that our parent does not change from under us. -DaveM
				1049	*/
				1050	write_lock_irq(&tasklist_lock);
				1051
				1052	err = -ESRCH;
				1053	p = find_task_by_pid(pid);
				1054	if (!p)
				1055	goto out;
				1056
				1057	err = -EINVAL;
				1058	if (!thread_group_leader(p))
				1059	goto out;
				1060
				1061	if (p->parent == current \|\| p->real_parent == current) {
				1062	err = -EPERM;
				1063	if (p->signal->session != current->signal->session)
				1064	goto out;
				1065	err = -EACCES;
				1066	if (p->did_exec)
				1067	goto out;
				1068	} else {
				1069	err = -ESRCH;
				1070	if (p != current)
				1071	goto out;
				1072	}
				1073
				1074	err = -EPERM;
				1075	if (p->signal->leader)
				1076	goto out;
				1077
				1078	if (pgid != pid) {
				1079	struct task_struct *p;
				1080
				1081	do_each_task_pid(pgid, PIDTYPE_PGID, p) {
				1082	if (p->signal->session == current->signal->session)
				1083	goto ok_pgid;
				1084	} while_each_task_pid(pgid, PIDTYPE_PGID, p);
				1085	goto out;
				1086	}
				1087
				1088	ok_pgid:
				1089	err = security_task_setpgid(p, pgid);
				1090	if (err)
				1091	goto out;
				1092
				1093	if (process_group(p) != pgid) {
				1094	detach_pid(p, PIDTYPE_PGID);
				1095	p->signal->pgrp = pgid;
				1096	attach_pid(p, PIDTYPE_PGID, pgid);
				1097	}
				1098
				1099	err = 0;
				1100	out:
				1101	/* All paths lead to here, thus we are safe. -DaveM */
				1102	write_unlock_irq(&tasklist_lock);
				1103	return err;
				1104	}
				1105
				1106	asmlinkage long sys_getpgid(pid_t pid)
				1107	{
				1108	if (!pid) {
				1109	return process_group(current);
				1110	} else {
				1111	int retval;
				1112	struct task_struct *p;
				1113
				1114	read_lock(&tasklist_lock);
				1115	p = find_task_by_pid(pid);
				1116
				1117	retval = -ESRCH;
				1118	if (p) {
				1119	retval = security_task_getpgid(p);
				1120	if (!retval)
				1121	retval = process_group(p);
				1122	}
				1123	read_unlock(&tasklist_lock);
				1124	return retval;
				1125	}
				1126	}
				1127
				1128	#ifdef __ARCH_WANT_SYS_GETPGRP
				1129
				1130	asmlinkage long sys_getpgrp(void)
				1131	{
				1132	/* SMP - assuming writes are word atomic this is fine */
				1133	return process_group(current);
				1134	}
				1135
				1136	#endif
				1137
				1138	asmlinkage long sys_getsid(pid_t pid)
				1139	{
				1140	if (!pid) {
				1141	return current->signal->session;
				1142	} else {
				1143	int retval;
				1144	struct task_struct *p;
				1145
				1146	read_lock(&tasklist_lock);
				1147	p = find_task_by_pid(pid);
				1148
				1149	retval = -ESRCH;
				1150	if(p) {
				1151	retval = security_task_getsid(p);
				1152	if (!retval)
				1153	retval = p->signal->session;
				1154	}
				1155	read_unlock(&tasklist_lock);
				1156	return retval;
				1157	}
				1158	}
				1159
				1160	asmlinkage long sys_setsid(void)
				1161	{
				1162	struct pid *pid;
				1163	int err = -EPERM;
				1164
				1165	if (!thread_group_leader(current))
				1166	return -EINVAL;
				1167
				1168	down(&tty_sem);
				1169	write_lock_irq(&tasklist_lock);
				1170
				1171	pid = find_pid(PIDTYPE_PGID, current->pid);
				1172	if (pid)
				1173	goto out;
				1174
				1175	current->signal->leader = 1;
				1176	__set_special_pids(current->pid, current->pid);
				1177	current->signal->tty = NULL;
				1178	current->signal->tty_old_pgrp = 0;
				1179	err = process_group(current);
				1180	out:
				1181	write_unlock_irq(&tasklist_lock);
				1182	up(&tty_sem);
				1183	return err;
				1184	}
				1185
				1186	/*
				1187	* Supplementary group IDs
				1188	*/
				1189
				1190	/* init to 2 - one for init_task, one to ensure it is never freed */
				1191	struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
				1192
				1193	struct group_info *groups_alloc(int gidsetsize)
				1194	{
				1195	struct group_info *group_info;
				1196	int nblocks;
				1197	int i;
				1198
				1199	nblocks = (gidsetsize + NGROUPS_PER_BLOCK - 1) / NGROUPS_PER_BLOCK;
				1200	/* Make sure we always allocate at least one indirect block pointer */
				1201	nblocks = nblocks ? : 1;
				1202	group_info = kmalloc(sizeof(group_info) + nblockssizeof(gid_t *), GFP_USER);
				1203	if (!group_info)
				1204	return NULL;
				1205	group_info->ngroups = gidsetsize;
				1206	group_info->nblocks = nblocks;
				1207	atomic_set(&group_info->usage, 1);
				1208
				1209	if (gidsetsize <= NGROUPS_SMALL) {
				1210	group_info->blocks[0] = group_info->small_block;
				1211	} else {
				1212	for (i = 0; i < nblocks; i++) {
				1213	gid_t *b;
				1214	b = (void *)__get_free_page(GFP_USER);
				1215	if (!b)
				1216	goto out_undo_partial_alloc;
				1217	group_info->blocks[i] = b;
				1218	}
				1219	}
				1220	return group_info;
				1221
				1222	out_undo_partial_alloc:
				1223	while (--i >= 0) {
				1224	free_page((unsigned long)group_info->blocks[i]);
				1225	}
				1226	kfree(group_info);
				1227	return NULL;
				1228	}
				1229
				1230	EXPORT_SYMBOL(groups_alloc);
				1231
				1232	void groups_free(struct group_info *group_info)
				1233	{
				1234	if (group_info->blocks[0] != group_info->small_block) {
				1235	int i;
				1236	for (i = 0; i < group_info->nblocks; i++)
				1237	free_page((unsigned long)group_info->blocks[i]);
				1238	}
				1239	kfree(group_info);
				1240	}
				1241
				1242	EXPORT_SYMBOL(groups_free);
				1243
				1244	/* export the group_info to a user-space array */
				1245	static int groups_to_user(gid_t __user *grouplist,
				1246	struct group_info *group_info)
				1247	{
				1248	int i;
				1249	int count = group_info->ngroups;
				1250
				1251	for (i = 0; i < group_info->nblocks; i++) {
				1252	int cp_count = min(NGROUPS_PER_BLOCK, count);
				1253	int off = i * NGROUPS_PER_BLOCK;
				1254	int len = cp_count * sizeof(*grouplist);
				1255
				1256	if (copy_to_user(grouplist+off, group_info->blocks[i], len))
				1257	return -EFAULT;
				1258
				1259	count -= cp_count;
				1260	}
				1261	return 0;
				1262	}
				1263
				1264	/* fill a group_info from a user-space array - it must be allocated already */
				1265	static int groups_from_user(struct group_info *group_info,
				1266	gid_t __user *grouplist)
				1267	{
				1268	int i;
				1269	int count = group_info->ngroups;
				1270
				1271	for (i = 0; i < group_info->nblocks; i++) {
				1272	int cp_count = min(NGROUPS_PER_BLOCK, count);
				1273	int off = i * NGROUPS_PER_BLOCK;
				1274	int len = cp_count * sizeof(*grouplist);
				1275
				1276	if (copy_from_user(group_info->blocks[i], grouplist+off, len))
				1277	return -EFAULT;
				1278
				1279	count -= cp_count;
				1280	}
				1281	return 0;
				1282	}
				1283
Domen Puncer	ebe8b54	2005-05-05 16:16:19 -0700	[diff] [blame]	1284	/* a simple Shell sort */
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1285	static void groups_sort(struct group_info *group_info)
				1286	{
				1287	int base, max, stride;
				1288	int gidsetsize = group_info->ngroups;
				1289
				1290	for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
				1291	; /* nothing */
				1292	stride /= 3;
				1293
				1294	while (stride) {
				1295	max = gidsetsize - stride;
				1296	for (base = 0; base < max; base++) {
				1297	int left = base;
				1298	int right = left + stride;
				1299	gid_t tmp = GROUP_AT(group_info, right);
				1300
				1301	while (left >= 0 && GROUP_AT(group_info, left) > tmp) {
				1302	GROUP_AT(group_info, right) =
				1303	GROUP_AT(group_info, left);
				1304	right = left;
				1305	left -= stride;
				1306	}
				1307	GROUP_AT(group_info, right) = tmp;
				1308	}
				1309	stride /= 3;
				1310	}
				1311	}
				1312
				1313	/* a simple bsearch */
David Howells	3e30148	2005-06-23 22:00:56 -0700	[diff] [blame]	1314	int groups_search(struct group_info *group_info, gid_t grp)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1315	{
				1316	int left, right;
				1317
				1318	if (!group_info)
				1319	return 0;
				1320
				1321	left = 0;
				1322	right = group_info->ngroups;
				1323	while (left < right) {
				1324	int mid = (left+right)/2;
				1325	int cmp = grp - GROUP_AT(group_info, mid);
				1326	if (cmp > 0)
				1327	left = mid + 1;
				1328	else if (cmp < 0)
				1329	right = mid;
				1330	else
				1331	return 1;
				1332	}
				1333	return 0;
				1334	}
				1335
				1336	/* validate and set current->group_info */
				1337	int set_current_groups(struct group_info *group_info)
				1338	{
				1339	int retval;
				1340	struct group_info *old_info;
				1341
				1342	retval = security_task_setgroups(group_info);
				1343	if (retval)
				1344	return retval;
				1345
				1346	groups_sort(group_info);
				1347	get_group_info(group_info);
				1348
				1349	task_lock(current);
				1350	old_info = current->group_info;
				1351	current->group_info = group_info;
				1352	task_unlock(current);
				1353
				1354	put_group_info(old_info);
				1355
				1356	return 0;
				1357	}
				1358
				1359	EXPORT_SYMBOL(set_current_groups);
				1360
				1361	asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist)
				1362	{
				1363	int i = 0;
				1364
				1365	/*
				1366	* SMP: Nobody else can change our grouplist. Thus we are
				1367	* safe.
				1368	*/
				1369
				1370	if (gidsetsize < 0)
				1371	return -EINVAL;
				1372
				1373	/* no need to grab task_lock here; it cannot change */
				1374	get_group_info(current->group_info);
				1375	i = current->group_info->ngroups;
				1376	if (gidsetsize) {
				1377	if (i > gidsetsize) {
				1378	i = -EINVAL;
				1379	goto out;
				1380	}
				1381	if (groups_to_user(grouplist, current->group_info)) {
				1382	i = -EFAULT;
				1383	goto out;
				1384	}
				1385	}
				1386	out:
				1387	put_group_info(current->group_info);
				1388	return i;
				1389	}
				1390
				1391	/*
				1392	* SMP: Our groups are copy-on-write. We can set them safely
				1393	* without another task interfering.
				1394	*/
				1395
				1396	asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist)
				1397	{
				1398	struct group_info *group_info;
				1399	int retval;
				1400
				1401	if (!capable(CAP_SETGID))
				1402	return -EPERM;
				1403	if ((unsigned)gidsetsize > NGROUPS_MAX)
				1404	return -EINVAL;
				1405
				1406	group_info = groups_alloc(gidsetsize);
				1407	if (!group_info)
				1408	return -ENOMEM;
				1409	retval = groups_from_user(group_info, grouplist);
				1410	if (retval) {
				1411	put_group_info(group_info);
				1412	return retval;
				1413	}
				1414
				1415	retval = set_current_groups(group_info);
				1416	put_group_info(group_info);
				1417
				1418	return retval;
				1419	}
				1420
				1421	/*
				1422	* Check whether we're fsgid/egid or in the supplemental group..
				1423	*/
				1424	int in_group_p(gid_t grp)
				1425	{
				1426	int retval = 1;
				1427	if (grp != current->fsgid) {
				1428	get_group_info(current->group_info);
				1429	retval = groups_search(current->group_info, grp);
				1430	put_group_info(current->group_info);
				1431	}
				1432	return retval;
				1433	}
				1434
				1435	EXPORT_SYMBOL(in_group_p);
				1436
				1437	int in_egroup_p(gid_t grp)
				1438	{
				1439	int retval = 1;
				1440	if (grp != current->egid) {
				1441	get_group_info(current->group_info);
				1442	retval = groups_search(current->group_info, grp);
				1443	put_group_info(current->group_info);
				1444	}
				1445	return retval;
				1446	}
				1447
				1448	EXPORT_SYMBOL(in_egroup_p);
				1449
				1450	DECLARE_RWSEM(uts_sem);
				1451
				1452	EXPORT_SYMBOL(uts_sem);
				1453
				1454	asmlinkage long sys_newuname(struct new_utsname __user * name)
				1455	{
				1456	int errno = 0;
				1457
				1458	down_read(&uts_sem);
				1459	if (copy_to_user(name,&system_utsname,sizeof *name))
				1460	errno = -EFAULT;
				1461	up_read(&uts_sem);
				1462	return errno;
				1463	}
				1464
				1465	asmlinkage long sys_sethostname(char __user *name, int len)
				1466	{
				1467	int errno;
				1468	char tmp[__NEW_UTS_LEN];
				1469
				1470	if (!capable(CAP_SYS_ADMIN))
				1471	return -EPERM;
				1472	if (len < 0 \|\| len > __NEW_UTS_LEN)
				1473	return -EINVAL;
				1474	down_write(&uts_sem);
				1475	errno = -EFAULT;
				1476	if (!copy_from_user(tmp, name, len)) {
				1477	memcpy(system_utsname.nodename, tmp, len);
				1478	system_utsname.nodename[len] = 0;
				1479	errno = 0;
				1480	}
				1481	up_write(&uts_sem);
				1482	return errno;
				1483	}
				1484
				1485	#ifdef __ARCH_WANT_SYS_GETHOSTNAME
				1486
				1487	asmlinkage long sys_gethostname(char __user *name, int len)
				1488	{
				1489	int i, errno;
				1490
				1491	if (len < 0)
				1492	return -EINVAL;
				1493	down_read(&uts_sem);
				1494	i = 1 + strlen(system_utsname.nodename);
				1495	if (i > len)
				1496	i = len;
				1497	errno = 0;
				1498	if (copy_to_user(name, system_utsname.nodename, i))
				1499	errno = -EFAULT;
				1500	up_read(&uts_sem);
				1501	return errno;
				1502	}
				1503
				1504	#endif
				1505
				1506	/*
				1507	* Only setdomainname; getdomainname can be implemented by calling
				1508	* uname()
				1509	*/
				1510	asmlinkage long sys_setdomainname(char __user *name, int len)
				1511	{
				1512	int errno;
				1513	char tmp[__NEW_UTS_LEN];
				1514
				1515	if (!capable(CAP_SYS_ADMIN))
				1516	return -EPERM;
				1517	if (len < 0 \|\| len > __NEW_UTS_LEN)
				1518	return -EINVAL;
				1519
				1520	down_write(&uts_sem);
				1521	errno = -EFAULT;
				1522	if (!copy_from_user(tmp, name, len)) {
				1523	memcpy(system_utsname.domainname, tmp, len);
				1524	system_utsname.domainname[len] = 0;
				1525	errno = 0;
				1526	}
				1527	up_write(&uts_sem);
				1528	return errno;
				1529	}
				1530
				1531	asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim)
				1532	{
				1533	if (resource >= RLIM_NLIMITS)
				1534	return -EINVAL;
				1535	else {
				1536	struct rlimit value;
				1537	task_lock(current->group_leader);
				1538	value = current->signal->rlim[resource];
				1539	task_unlock(current->group_leader);
				1540	return copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
				1541	}
				1542	}
				1543
				1544	#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
				1545
				1546	/*
				1547	* Back compatibility for getrlimit. Needed for some apps.
				1548	*/
				1549
				1550	asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim)
				1551	{
				1552	struct rlimit x;
				1553	if (resource >= RLIM_NLIMITS)
				1554	return -EINVAL;
				1555
				1556	task_lock(current->group_leader);
				1557	x = current->signal->rlim[resource];
				1558	task_unlock(current->group_leader);
				1559	if(x.rlim_cur > 0x7FFFFFFF)
				1560	x.rlim_cur = 0x7FFFFFFF;
				1561	if(x.rlim_max > 0x7FFFFFFF)
				1562	x.rlim_max = 0x7FFFFFFF;
				1563	return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
				1564	}
				1565
				1566	#endif
				1567
				1568	asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
				1569	{
				1570	struct rlimit new_rlim, *old_rlim;
				1571	int retval;
				1572
				1573	if (resource >= RLIM_NLIMITS)
				1574	return -EINVAL;
				1575	if(copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
				1576	return -EFAULT;
				1577	if (new_rlim.rlim_cur > new_rlim.rlim_max)
				1578	return -EINVAL;
				1579	old_rlim = current->signal->rlim + resource;
				1580	if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
				1581	!capable(CAP_SYS_RESOURCE))
				1582	return -EPERM;
				1583	if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN)
				1584	return -EPERM;
				1585
				1586	retval = security_task_setrlimit(resource, &new_rlim);
				1587	if (retval)
				1588	return retval;
				1589
				1590	task_lock(current->group_leader);
				1591	*old_rlim = new_rlim;
				1592	task_unlock(current->group_leader);
				1593
				1594	if (resource == RLIMIT_CPU && new_rlim.rlim_cur != RLIM_INFINITY &&
				1595	(cputime_eq(current->signal->it_prof_expires, cputime_zero) \|\|
				1596	new_rlim.rlim_cur <= cputime_to_secs(
				1597	current->signal->it_prof_expires))) {
				1598	cputime_t cputime = secs_to_cputime(new_rlim.rlim_cur);
				1599	read_lock(&tasklist_lock);
				1600	spin_lock_irq(&current->sighand->siglock);
				1601	set_process_cpu_timer(current, CPUCLOCK_PROF,
				1602	&cputime, NULL);
				1603	spin_unlock_irq(&current->sighand->siglock);
				1604	read_unlock(&tasklist_lock);
				1605	}
				1606
				1607	return 0;
				1608	}
				1609
				1610	/*
				1611	* It would make sense to put struct rusage in the task_struct,
				1612	* except that would make the task_struct be really big. After
				1613	* task_struct gets moved into malloc'ed memory, it would
				1614	* make sense to do this. It will make moving the rest of the information
				1615	* a lot simpler! (Which we're not doing right now because we're not
				1616	* measuring them yet).
				1617	*
				1618	* This expects to be called with tasklist_lock read-locked or better,
				1619	* and the siglock not locked. It may momentarily take the siglock.
				1620	*
				1621	* When sampling multiple threads for RUSAGE_SELF, under SMP we might have
				1622	* races with threads incrementing their own counters. But since word
				1623	* reads are atomic, we either get new values or old values and we don't
				1624	* care which for the sums. We always take the siglock to protect reading
				1625	* the c* fields from p->signal from races with exit.c updating those
				1626	* fields when reaping, so a sample either gets all the additions of a
				1627	* given child after it's reaped, or none so this sample is before reaping.
				1628	*/
				1629
				1630	static void k_getrusage(struct task_struct p, int who, struct rusage r)
				1631	{
				1632	struct task_struct *t;
				1633	unsigned long flags;
				1634	cputime_t utime, stime;
				1635
				1636	memset((char ) r, 0, sizeof r);
				1637
				1638	if (unlikely(!p->signal))
				1639	return;
				1640
				1641	switch (who) {
				1642	case RUSAGE_CHILDREN:
				1643	spin_lock_irqsave(&p->sighand->siglock, flags);
				1644	utime = p->signal->cutime;
				1645	stime = p->signal->cstime;
				1646	r->ru_nvcsw = p->signal->cnvcsw;
				1647	r->ru_nivcsw = p->signal->cnivcsw;
				1648	r->ru_minflt = p->signal->cmin_flt;
				1649	r->ru_majflt = p->signal->cmaj_flt;
				1650	spin_unlock_irqrestore(&p->sighand->siglock, flags);
				1651	cputime_to_timeval(utime, &r->ru_utime);
				1652	cputime_to_timeval(stime, &r->ru_stime);
				1653	break;
				1654	case RUSAGE_SELF:
				1655	spin_lock_irqsave(&p->sighand->siglock, flags);
				1656	utime = stime = cputime_zero;
				1657	goto sum_group;
				1658	case RUSAGE_BOTH:
				1659	spin_lock_irqsave(&p->sighand->siglock, flags);
				1660	utime = p->signal->cutime;
				1661	stime = p->signal->cstime;
				1662	r->ru_nvcsw = p->signal->cnvcsw;
				1663	r->ru_nivcsw = p->signal->cnivcsw;
				1664	r->ru_minflt = p->signal->cmin_flt;
				1665	r->ru_majflt = p->signal->cmaj_flt;
				1666	sum_group:
				1667	utime = cputime_add(utime, p->signal->utime);
				1668	stime = cputime_add(stime, p->signal->stime);
				1669	r->ru_nvcsw += p->signal->nvcsw;
				1670	r->ru_nivcsw += p->signal->nivcsw;
				1671	r->ru_minflt += p->signal->min_flt;
				1672	r->ru_majflt += p->signal->maj_flt;
				1673	t = p;
				1674	do {
				1675	utime = cputime_add(utime, t->utime);
				1676	stime = cputime_add(stime, t->stime);
				1677	r->ru_nvcsw += t->nvcsw;
				1678	r->ru_nivcsw += t->nivcsw;
				1679	r->ru_minflt += t->min_flt;
				1680	r->ru_majflt += t->maj_flt;
				1681	t = next_thread(t);
				1682	} while (t != p);
				1683	spin_unlock_irqrestore(&p->sighand->siglock, flags);
				1684	cputime_to_timeval(utime, &r->ru_utime);
				1685	cputime_to_timeval(stime, &r->ru_stime);
				1686	break;
				1687	default:
				1688	BUG();
				1689	}
				1690	}
				1691
				1692	int getrusage(struct task_struct p, int who, struct rusage __user ru)
				1693	{
				1694	struct rusage r;
				1695	read_lock(&tasklist_lock);
				1696	k_getrusage(p, who, &r);
				1697	read_unlock(&tasklist_lock);
				1698	return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
				1699	}
				1700
				1701	asmlinkage long sys_getrusage(int who, struct rusage __user *ru)
				1702	{
				1703	if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
				1704	return -EINVAL;
				1705	return getrusage(current, who, ru);
				1706	}
				1707
				1708	asmlinkage long sys_umask(int mask)
				1709	{
				1710	mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
				1711	return mask;
				1712	}
				1713
				1714	asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
				1715	unsigned long arg4, unsigned long arg5)
				1716	{
				1717	long error;
				1718	int sig;
				1719
				1720	error = security_task_prctl(option, arg2, arg3, arg4, arg5);
				1721	if (error)
				1722	return error;
				1723
				1724	switch (option) {
				1725	case PR_SET_PDEATHSIG:
				1726	sig = arg2;
Jesper Juhl	7ed20e1	2005-05-01 08:59:14 -0700	[diff] [blame]	1727	if (!valid_signal(sig)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1728	error = -EINVAL;
				1729	break;
				1730	}
				1731	current->pdeath_signal = sig;
				1732	break;
				1733	case PR_GET_PDEATHSIG:
				1734	error = put_user(current->pdeath_signal, (int __user *)arg2);
				1735	break;
				1736	case PR_GET_DUMPABLE:
				1737	if (current->mm->dumpable)
				1738	error = 1;
				1739	break;
				1740	case PR_SET_DUMPABLE:
Alan Cox	d6e7114	2005-06-23 00:09:43 -0700	[diff] [blame]	1741	if (arg2 < 0 \|\| arg2 > 2) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1742	error = -EINVAL;
				1743	break;
				1744	}
				1745	current->mm->dumpable = arg2;
				1746	break;
				1747
				1748	case PR_SET_UNALIGN:
				1749	error = SET_UNALIGN_CTL(current, arg2);
				1750	break;
				1751	case PR_GET_UNALIGN:
				1752	error = GET_UNALIGN_CTL(current, arg2);
				1753	break;
				1754	case PR_SET_FPEMU:
				1755	error = SET_FPEMU_CTL(current, arg2);
				1756	break;
				1757	case PR_GET_FPEMU:
				1758	error = GET_FPEMU_CTL(current, arg2);
				1759	break;
				1760	case PR_SET_FPEXC:
				1761	error = SET_FPEXC_CTL(current, arg2);
				1762	break;
				1763	case PR_GET_FPEXC:
				1764	error = GET_FPEXC_CTL(current, arg2);
				1765	break;
				1766	case PR_GET_TIMING:
				1767	error = PR_TIMING_STATISTICAL;
				1768	break;
				1769	case PR_SET_TIMING:
				1770	if (arg2 == PR_TIMING_STATISTICAL)
				1771	error = 0;
				1772	else
				1773	error = -EINVAL;
				1774	break;
				1775
				1776	case PR_GET_KEEPCAPS:
				1777	if (current->keep_capabilities)
				1778	error = 1;
				1779	break;
				1780	case PR_SET_KEEPCAPS:
				1781	if (arg2 != 0 && arg2 != 1) {
				1782	error = -EINVAL;
				1783	break;
				1784	}
				1785	current->keep_capabilities = arg2;
				1786	break;
				1787	case PR_SET_NAME: {
				1788	struct task_struct *me = current;
				1789	unsigned char ncomm[sizeof(me->comm)];
				1790
				1791	ncomm[sizeof(me->comm)-1] = 0;
				1792	if (strncpy_from_user(ncomm, (char __user *)arg2,
				1793	sizeof(me->comm)-1) < 0)
				1794	return -EFAULT;
				1795	set_task_comm(me, ncomm);
				1796	return 0;
				1797	}
				1798	case PR_GET_NAME: {
				1799	struct task_struct *me = current;
				1800	unsigned char tcomm[sizeof(me->comm)];
				1801
				1802	get_task_comm(tcomm, me);
				1803	if (copy_to_user((char __user *)arg2, tcomm, sizeof(tcomm)))
				1804	return -EFAULT;
				1805	return 0;
				1806	}
				1807	default:
				1808	error = -EINVAL;
				1809	break;
				1810	}
				1811	return error;
				1812	}