exec: pin stack limit during exec

Since the stack rlimit is used in multiple places during exec and it can
be changed via other threads (via setrlimit()) or processes (via
prlimit()), the assumption that the value doesn't change cannot be made.
This leads to races with mm layout selection and argument size
calculations.  This changes the exec path to use the rlimit stored in
bprm instead of in current.  Before starting the thread, the bprm stack
rlimit is stored back to current.

Link: http://lkml.kernel.org/r/1518638796-20819-4-git-send-email-keescook@chromium.org
Fixes: 64701dee4178e ("exec: Use sane stack rlimit under secureexec")
Signed-off-by: Kees Cook <keescook@chromium.org>
Reported-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Reported-by: Andy Lutomirski <luto@kernel.org>
Reported-by: Brad Spengler <spender@grsecurity.net>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Ben Hutchings <ben@decadent.org.uk>
Cc: Greg KH <greg@kroah.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: "Jason A. Donenfeld" <Jason@zx2c4.com>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/fs/exec.c b/fs/exec.c
index 422ad79..183059c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -257,7 +257,7 @@
 		 *    to work from.
 		 */
 		limit = _STK_LIM / 4 * 3;
-		limit = min(limit, rlimit(RLIMIT_STACK) / 4);
+		limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
 		if (size > limit)
 			goto fail;
 	}
@@ -411,6 +411,11 @@
 	if (!mm)
 		goto err;
 
+	/* Save current stack limit for all calculations made during exec. */
+	task_lock(current->group_leader);
+	bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK];
+	task_unlock(current->group_leader);
+
 	err = __bprm_mm_init(bprm);
 	if (err)
 		goto err;
@@ -697,7 +702,7 @@
 
 #ifdef CONFIG_STACK_GROWSUP
 	/* Limit stack size */
-	stack_base = rlimit_max(RLIMIT_STACK);
+	stack_base = bprm->rlim_stack.rlim_max;
 	if (stack_base > STACK_SIZE_MAX)
 		stack_base = STACK_SIZE_MAX;
 
@@ -770,7 +775,7 @@
 	 * Align this down to a page boundary as expand_stack
 	 * will align it up.
 	 */
-	rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK;
+	rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK;
 #ifdef CONFIG_STACK_GROWSUP
 	if (stack_size + stack_expand > rlim_stack)
 		stack_base = vma->vm_start + rlim_stack;
@@ -1323,8 +1328,6 @@
 
 void setup_new_exec(struct linux_binprm * bprm)
 {
-	struct rlimit rlim_stack;
-
 	/*
 	 * Once here, prepare_binrpm() will not be called any more, so
 	 * the final state of setuid/setgid/fscaps can be merged into the
@@ -1343,15 +1346,11 @@
 		 * RLIMIT_STACK, but after the point of no return to avoid
 		 * needing to clean up the change on failure.
 		 */
-		if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM)
-			current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM;
+		if (bprm->rlim_stack.rlim_cur > _STK_LIM)
+			bprm->rlim_stack.rlim_cur = _STK_LIM;
 	}
 
-	task_lock(current->group_leader);
-	rlim_stack = current->signal->rlim[RLIMIT_STACK];
-	task_unlock(current->group_leader);
-
-	arch_pick_mmap_layout(current->mm, &rlim_stack);
+	arch_pick_mmap_layout(current->mm, &bprm->rlim_stack);
 
 	current->sas_ss_sp = current->sas_ss_size = 0;
 
@@ -1387,6 +1386,10 @@
 /* Runs immediately before start_thread() takes over. */
 void finalize_exec(struct linux_binprm *bprm)
 {
+	/* Store any stack rlimit changes before starting thread. */
+	task_lock(current->group_leader);
+	current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack;
+	task_unlock(current->group_leader);
 }
 EXPORT_SYMBOL(finalize_exec);