mm: oom_kill: reap memory of a task that receives SIGKILL

Free the pages parallely for a task that receives SIGKILL using the
oom_reaper. This freeing of pages will help to give the pages to buddy
system well advance.
This reaps for the process which received SIGKILL through
either sys_kill from user or kill_pid from kernel and that sending
process has CAP_KILL capability.
Also sysctl interface, reap_mem_on_sigkill, is added to turn on/off this
feature.

Change-Id: I21adb95de5e380a80d7eb0b87d9b5b553f52e28a
Signed-off-by: Charan Teja Reddy <charante@codeaurora.org>
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 3cab335..1ac0123 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -50,6 +50,7 @@
 - nr_trim_pages         (only if CONFIG_MMU=n)
 - numa_zonelist_order
 - oom_dump_tasks
+- reap_mem_on_sigkill
 - oom_kill_allocating_task
 - overcommit_kbytes
 - overcommit_memory
@@ -635,6 +636,24 @@
 
 ==============================================================
 
+reap_mem_on_sigkill
+
+This enables or disables the memory reaping for a SIGKILL received
+process and that the sending process must have the CAP_KILL capabilities.
+
+If this is set to 1, when a process receives SIGKILL from a process
+that has the capability, CAP_KILL, the process is added into the oom_reaper
+queue which can be picked up by the oom_reaper thread to reap the memory of
+that process. This reaps for the process which received SIGKILL through
+either sys_kill from user or kill_pid from kernel.
+
+If this is set to 0, we are not reaping memory of a SIGKILL, sent through
+either sys_kill from user or kill_pid from kernel, received process.
+
+The default value is 0 (disabled).
+
+==============================================================
+
 oom_kill_allocating_task
 
 This enables or disables killing the OOM-triggering task in
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 34ed577..e69445a 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -97,4 +97,8 @@
 extern int sysctl_oom_dump_tasks;
 extern int sysctl_oom_kill_allocating_task;
 extern int sysctl_panic_on_oom;
+extern int sysctl_reap_mem_on_sigkill;
+
+/* calls for LMK reaper */
+extern void add_to_oom_reaper(struct task_struct *p);
 #endif /* _INCLUDE_LINUX_OOM_H */
diff --git a/kernel/signal.c b/kernel/signal.c
index 4364e57..23af00f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -34,6 +34,8 @@
 #include <linux/compat.h>
 #include <linux/cn_proc.h>
 #include <linux/compiler.h>
+#include <linux/oom.h>
+#include <linux/capability.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/signal.h>
@@ -1269,8 +1271,11 @@
 	ret = check_kill_permission(sig, info, p);
 	rcu_read_unlock();
 
-	if (!ret && sig)
+	if (!ret && sig) {
 		ret = do_send_sig_info(sig, info, p, true);
+		if (capable(CAP_KILL) && sig == SIGKILL)
+			add_to_oom_reaper(p);
+	}
 
 	return ret;
 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1d5e480..6efdb67 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1394,6 +1394,13 @@
 		.proc_handler	= proc_dointvec,
 	},
 	{
+		.procname       = "reap_mem_on_sigkill",
+		.data           = &sysctl_reap_mem_on_sigkill,
+		.maxlen         = sizeof(sysctl_reap_mem_on_sigkill),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec,
+	},
+	{
 		.procname	= "overcommit_ratio",
 		.data		= &sysctl_overcommit_ratio,
 		.maxlen		= sizeof(sysctl_overcommit_ratio),
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f99065f..a7fd1c6 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -49,6 +49,7 @@
 int sysctl_panic_on_oom;
 int sysctl_oom_kill_allocating_task;
 int sysctl_oom_dump_tasks = 1;
+int sysctl_reap_mem_on_sigkill;
 
 DEFINE_MUTEX(oom_lock);
 
@@ -614,10 +615,13 @@
 	if (!oom_reaper_th)
 		return;
 
-	/* move the lock here to avoid scenario of queuing
-	 * the same task by both OOM killer and LMK.
+	/*
+	 * Move the lock here to avoid scenario of queuing
+	 * the same task by both OOM killer and any other SIGKILL
+	 * path.
 	 */
 	spin_lock(&oom_reaper_lock);
+
 	/* tsk is already queued? */
 	if (tsk == oom_reaper_list || tsk->oom_reaper_list) {
 		spin_unlock(&oom_reaper_lock);
@@ -650,6 +654,16 @@
 }
 #endif /* CONFIG_MMU */
 
+static void __mark_oom_victim(struct task_struct *tsk)
+{
+	struct mm_struct *mm = tsk->mm;
+
+	if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) {
+		atomic_inc(&tsk->signal->oom_mm->mm_count);
+		set_bit(MMF_OOM_VICTIM, &mm->flags);
+	}
+}
+
 /**
  * mark_oom_victim - mark the given task as OOM victim
  * @tsk: task to mark
@@ -662,18 +676,13 @@
  */
 static void mark_oom_victim(struct task_struct *tsk)
 {
-	struct mm_struct *mm = tsk->mm;
-
 	WARN_ON(oom_killer_disabled);
 	/* OOM killer might race with memcg OOM */
 	if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE))
 		return;
 
 	/* oom_mm is bound to the signal struct life time. */
-	if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) {
-		atomic_inc(&tsk->signal->oom_mm->mm_count);
-		set_bit(MMF_OOM_VICTIM, &mm->flags);
-	}
+	__mark_oom_victim(tsk);
 
 	/*
 	 * Make sure that the task is woken up from uninterruptible sleep
@@ -1089,3 +1098,22 @@
 	out_of_memory(&oc);
 	mutex_unlock(&oom_lock);
 }
+
+void add_to_oom_reaper(struct task_struct *p)
+	__releases(p->alloc_lock)
+{
+	if (!sysctl_reap_mem_on_sigkill)
+		return;
+
+	p = find_lock_task_mm(p);
+	if (!p)
+		return;
+
+	get_task_struct(p);
+	if (task_will_free_mem(p)) {
+		__mark_oom_victim(p);
+		wake_oom_reaper(p);
+	}
+	task_unlock(p);
+	put_task_struct(p);
+}