Blame - mm/oom_kill.c - kernel/msm-4.9

blob: 1e56076672f5870e9766753270d77442e5743ac9 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/mm/oom_kill.c
				3	*
				4	* Copyright (C) 1998,2000 Rik van Riel
				5	* Thanks go out to Claus Fischer for some serious inspiration and
				6	* for goading me into coding this file...
				7	*
				8	* The routines in this file are used to kill a process when
				9	* we're seriously out of memory. This gets called from kswapd()
				10	* in linux/mm/vmscan.c when we really run out of memory.
				11	*
				12	* Since we won't call these routines often (on a well-configured
				13	* machine) this file will double as a 'coding guide' and a signpost
				14	* for newbie kernel hackers. It features several pointers to major
				15	* kernel subsystems and hints as to where to find out what things do.
				16	*/
				17
				18	#include <linux/mm.h>
				19	#include <linux/sched.h>
				20	#include <linux/swap.h>
				21	#include <linux/timex.h>
				22	#include <linux/jiffies.h>
				23
				24	/* #define DEBUG */
				25
				26	/**
				27	* oom_badness - calculate a numeric value for how bad this task has been
				28	* @p: task struct of which task we should calculate
				29	* @p: current uptime in seconds
				30	*
				31	* The formula used is relatively simple and documented inline in the
				32	* function. The main rationale is that we want to select a good task
				33	* to kill when we run out of memory.
				34	*
				35	* Good in this context means that:
				36	* 1) we lose the minimum amount of work done
				37	* 2) we recover a large amount of memory
				38	* 3) we don't kill anything innocent of eating tons of memory
				39	* 4) we want to kill the minimum amount of processes (one)
				40	* 5) we try to kill the process the user expects us to kill, this
				41	* algorithm has been meticulously tuned to meet the principle
				42	* of least surprise ... (be careful when you change it)
				43	*/
				44
				45	unsigned long badness(struct task_struct *p, unsigned long uptime)
				46	{
				47	unsigned long points, cpu_time, run_time, s;
				48	struct list_head *tsk;
				49
				50	if (!p->mm)
				51	return 0;
				52
				53	/*
				54	* The memory size of the process is the basis for the badness.
				55	*/
				56	points = p->mm->total_vm;
				57
				58	/*
				59	* Processes which fork a lot of child processes are likely
				60	* a good choice. We add the vmsize of the childs if they
				61	* have an own mm. This prevents forking servers to flood the
				62	* machine with an endless amount of childs
				63	*/
				64	list_for_each(tsk, &p->children) {
				65	struct task_struct *chld;
				66	chld = list_entry(tsk, struct task_struct, sibling);
				67	if (chld->mm != p->mm && chld->mm)
				68	points += chld->mm->total_vm;
				69	}
				70
				71	/*
				72	* CPU time is in tens of seconds and run time is in thousands
				73	* of seconds. There is no particular reason for this other than
				74	* that it turned out to work very well in practice.
				75	*/
				76	cpu_time = (cputime_to_jiffies(p->utime) + cputime_to_jiffies(p->stime))
				77	>> (SHIFT_HZ + 3);
				78
				79	if (uptime >= p->start_time.tv_sec)
				80	run_time = (uptime - p->start_time.tv_sec) >> 10;
				81	else
				82	run_time = 0;
				83
				84	s = int_sqrt(cpu_time);
				85	if (s)
				86	points /= s;
				87	s = int_sqrt(int_sqrt(run_time));
				88	if (s)
				89	points /= s;
				90
				91	/*
				92	* Niced processes are most likely less important, so double
				93	* their badness points.
				94	*/
				95	if (task_nice(p) > 0)
				96	points *= 2;
				97
				98	/*
				99	* Superuser processes are usually more important, so we make it
				100	* less likely that we kill those.
				101	*/
				102	if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_ADMIN) \|\|
				103	p->uid == 0 \|\| p->euid == 0)
				104	points /= 4;
				105
				106	/*
				107	* We don't want to kill a process with direct hardware access.
				108	* Not only could that mess up the hardware, but usually users
				109	* tend to only have this flag set on applications they think
				110	* of as important.
				111	*/
				112	if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO))
				113	points /= 4;
				114
				115	/*
				116	* Adjust the score by oomkilladj.
				117	*/
				118	if (p->oomkilladj) {
				119	if (p->oomkilladj > 0)
				120	points <<= p->oomkilladj;
				121	else
				122	points >>= -(p->oomkilladj);
				123	}
				124
				125	#ifdef DEBUG
				126	printk(KERN_DEBUG "OOMkill: task %d (%s) got %d points\n",
				127	p->pid, p->comm, points);
				128	#endif
				129	return points;
				130	}
				131
				132	/*
				133	* Simple selection loop. We chose the process with the highest
				134	* number of 'points'. We expect the caller will lock the tasklist.
				135	*
				136	* (not docbooked, we don't want this one cluttering up the manual)
				137	*/
				138	static struct task_struct * select_bad_process(void)
				139	{
				140	unsigned long maxpoints = 0;
				141	struct task_struct g, p;
				142	struct task_struct *chosen = NULL;
				143	struct timespec uptime;
				144
				145	do_posix_clock_monotonic_gettime(&uptime);
				146	do_each_thread(g, p)
				147	/* skip the init task with pid == 1 */
Andrea Arcangeli	79befd0	2005-04-16 15:24:05 -0700	[diff] [blame]	148	if (p->pid > 1 && p->oomkilladj != OOM_DISABLE) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	149	unsigned long points;
				150
				151	/*
				152	* This is in the process of releasing memory so wait it
				153	* to finish before killing some other task by mistake.
				154	*/
				155	if ((unlikely(test_tsk_thread_flag(p, TIF_MEMDIE)) \|\| (p->flags & PF_EXITING)) &&
				156	!(p->flags & PF_DEAD))
				157	return ERR_PTR(-1UL);
				158	if (p->flags & PF_SWAPOFF)
				159	return p;
				160
				161	points = badness(p, uptime.tv_sec);
				162	if (points > maxpoints \|\| !chosen) {
				163	chosen = p;
				164	maxpoints = points;
				165	}
				166	}
				167	while_each_thread(g, p);
				168	return chosen;
				169	}
				170
				171	/**
				172	* We must be careful though to never send SIGKILL a process with
				173	* CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that
				174	* we select a process with CAP_SYS_RAW_IO set).
				175	*/
				176	static void __oom_kill_task(task_t *p)
				177	{
				178	if (p->pid == 1) {
				179	WARN_ON(1);
				180	printk(KERN_WARNING "tried to kill init!\n");
				181	return;
				182	}
				183
				184	task_lock(p);
				185	if (!p->mm \|\| p->mm == &init_mm) {
				186	WARN_ON(1);
				187	printk(KERN_WARNING "tried to kill an mm-less task!\n");
				188	task_unlock(p);
				189	return;
				190	}
				191	task_unlock(p);
				192	printk(KERN_ERR "Out of Memory: Killed process %d (%s).\n", p->pid, p->comm);
				193
				194	/*
				195	* We give our sacrificial lamb high priority and access to
				196	* all the memory it needs. That way it should be able to
				197	* exit() and clear out its resources quickly...
				198	*/
				199	p->time_slice = HZ;
				200	set_tsk_thread_flag(p, TIF_MEMDIE);
				201
				202	force_sig(SIGKILL, p);
				203	}
				204
				205	static struct mm_struct oom_kill_task(task_t p)
				206	{
				207	struct mm_struct *mm = get_task_mm(p);
				208	task_t * g, * q;
				209
				210	if (!mm)
				211	return NULL;
				212	if (mm == &init_mm) {
				213	mmput(mm);
				214	return NULL;
				215	}
				216
				217	__oom_kill_task(p);
				218	/*
				219	* kill all processes that share the ->mm (i.e. all threads),
				220	* but are in a different thread group
				221	*/
				222	do_each_thread(g, q)
				223	if (q->mm == mm && q->tgid != p->tgid)
				224	__oom_kill_task(q);
				225	while_each_thread(g, q);
				226
				227	return mm;
				228	}
				229
				230	static struct mm_struct oom_kill_process(struct task_struct p)
				231	{
				232	struct mm_struct *mm;
				233	struct task_struct *c;
				234	struct list_head *tsk;
				235
				236	/* Try to kill a child first */
				237	list_for_each(tsk, &p->children) {
				238	c = list_entry(tsk, struct task_struct, sibling);
				239	if (c->mm == p->mm)
				240	continue;
				241	mm = oom_kill_task(c);
				242	if (mm)
				243	return mm;
				244	}
				245	return oom_kill_task(p);
				246	}
				247
				248	/**
				249	* oom_kill - kill the "best" process when we run out of memory
				250	*
				251	* If we run out of memory, we have the choice between either
				252	* killing a random task (bad), letting the system crash (worse)
				253	* OR try to be smart about which process to kill. Note that we
				254	* don't have to be perfect here, we just have to be good.
				255	*/
Marcelo Tosatti	79b9ce3	2005-07-07 17:56:04 -0700	[diff] [blame]	256	void out_of_memory(unsigned int __nocast gfp_mask, int order)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	257	{
				258	struct mm_struct *mm = NULL;
				259	task_t * p;
				260
Anton Blanchard	4263926	2005-07-07 17:56:06 -0700	[diff] [blame]	261	if (printk_ratelimit()) {
				262	printk("oom-killer: gfp_mask=0x%x, order=%d\n",
				263	gfp_mask, order);
				264	show_mem();
				265	}
Janet Morgan	578c2fd	2005-06-21 17:14:56 -0700	[diff] [blame]	266
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	267	read_lock(&tasklist_lock);
				268	retry:
				269	p = select_bad_process();
				270
				271	if (PTR_ERR(p) == -1UL)
				272	goto out;
				273
				274	/* Found nothing?!?! Either we hang forever, or we panic. */
				275	if (!p) {
				276	read_unlock(&tasklist_lock);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	277	panic("Out of memory and no killable processes...\n");
				278	}
				279
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	280	mm = oom_kill_process(p);
				281	if (!mm)
				282	goto retry;
				283
				284	out:
				285	read_unlock(&tasklist_lock);
				286	if (mm)
				287	mmput(mm);
				288
				289	/*
				290	* Give "p" a good chance of killing itself before we
				291	* retry to allocate memory.
				292	*/
				293	__set_current_state(TASK_INTERRUPTIBLE);
				294	schedule_timeout(1);
				295	}