Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/signal

Pull generic execve() changes from Al Viro:
 "This introduces the generic kernel_thread() and kernel_execve()
  functions, and switches x86, arm, alpha, um and s390 over to them."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/signal: (26 commits)
  s390: convert to generic kernel_execve()
  s390: switch to generic kernel_thread()
  s390: fold kernel_thread_helper() into ret_from_fork()
  s390: fold execve_tail() into start_thread(), convert to generic sys_execve()
  um: switch to generic kernel_thread()
  x86, um/x86: switch to generic sys_execve and kernel_execve
  x86: split ret_from_fork
  alpha: introduce ret_from_kernel_execve(), switch to generic kernel_execve()
  alpha: switch to generic kernel_thread()
  alpha: switch to generic sys_execve()
  arm: get rid of execve wrapper, switch to generic execve() implementation
  arm: optimized current_pt_regs()
  arm: introduce ret_from_kernel_execve(), switch to generic kernel_execve()
  arm: split ret_from_fork, simplify kernel_thread() [based on patch by rmk]
  generic sys_execve()
  generic kernel_execve()
  new helper: current_pt_regs()
  preparation for generic kernel_thread()
  um: kill thread->forking
  um: let signal_delivered() do SIGTRAP on singlestepping into handler
  ...
diff --git a/arch/Kconfig b/arch/Kconfig
index 550cce4..26a2841 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -271,6 +271,9 @@
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	bool
 
+config GENERIC_KERNEL_THREAD
+	bool
+
 config HAVE_ARCH_SECCOMP_FILTER
 	bool
 	help
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 9944ded..7da9124 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -20,6 +20,7 @@
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select GENERIC_KERNEL_THREAD
 	help
 	  The Alpha is a 64-bit general-purpose processor designed and
 	  marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index d97d663..64ffc9e 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -10,3 +10,4 @@
 header-y += reg.h
 header-y += regdef.h
 header-y += sysinfo.h
+generic-y += exec.h
diff --git a/arch/alpha/include/asm/exec.h b/arch/alpha/include/asm/exec.h
deleted file mode 100644
index 4a5a41f..0000000
--- a/arch/alpha/include/asm/exec.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ALPHA_EXEC_H
-#define __ALPHA_EXEC_H
-
-#define arch_align_stack(x) (x)
-
-#endif /* __ALPHA_EXEC_H */
diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h
index e37b887..6cb7fe8 100644
--- a/arch/alpha/include/asm/processor.h
+++ b/arch/alpha/include/asm/processor.h
@@ -49,9 +49,6 @@
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
-/* Create a kernel thread without removing it from tasklists.  */
-extern long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc)
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index a31a78e..3cb6c11 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -481,6 +481,8 @@
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_KERNEL_EXECVE
 
 /* "Conditional" syscalls.  What we want is
 
diff --git a/arch/alpha/kernel/alpha_ksyms.c b/arch/alpha/kernel/alpha_ksyms.c
index 15fa821..89566b3 100644
--- a/arch/alpha/kernel/alpha_ksyms.c
+++ b/arch/alpha/kernel/alpha_ksyms.c
@@ -50,9 +50,6 @@
 EXPORT_SYMBOL(alpha_write_fp_reg);
 EXPORT_SYMBOL(alpha_write_fp_reg_s);
 
-/* entry.S */
-EXPORT_SYMBOL(kernel_thread);
-
 /* Networking helper routines. */
 EXPORT_SYMBOL(csum_tcpudp_magic);
 EXPORT_SYMBOL(ip_compute_csum);
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index ec0da05..7e43e11 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -609,59 +609,35 @@
 .end ret_from_fork
 
 /*
- * kernel_thread(fn, arg, clone_flags)
+ * ... and new kernel threads - here
  */
 	.align 4
-	.globl	kernel_thread
-	.ent	kernel_thread
-kernel_thread:
-	/* We can be called from a module.  */
-	ldgp	$gp, 0($27)
-	.prologue 1
-	subq	$sp, SP_OFF+6*8, $sp
-	br	$1, 2f		/* load start address */
-
-	/* We've now "returned" from a fake system call.  */
-	unop
-	blt	$0, 1f		/* error?  */
-	ldi	$1, 0x3fff
-	beq	$20, 1f		/* parent or child?  */
-
-	bic	$sp, $1, $8	/* in child.  */
-	jsr	$26, ($27)
+	.globl	ret_from_kernel_thread
+	.ent	ret_from_kernel_thread
+ret_from_kernel_thread:
+	mov	$17, $16
+	jsr	$26, schedule_tail
+	mov	$9, $27
+	mov	$10, $16
+	jsr	$26, ($9)
 	ldgp	$gp, 0($26)
 	mov	$0, $16
 	mov	$31, $26
 	jmp	$31, sys_exit
+.end ret_from_kernel_thread
 
-1:	ret			/* in parent.  */
-
-	.align 4
-2:	/* Fake a system call stack frame, as we can't do system calls
-	   from kernel space.  Note that we store FN and ARG as they
-	   need to be set up in the child for the call.  Also store $8
-	   and $26 for use in the parent.  */
-	stq	$31, SP_OFF($sp)	/* ps */
-	stq	$1, SP_OFF+8($sp)	/* pc */
-	stq	$gp, SP_OFF+16($sp)	/* gp */
-	stq	$16, 136($sp)		/* $27; FN for child */
-	stq	$17, SP_OFF+24($sp)	/* $16; ARG for child */
-	stq	$8, 64($sp)		/* $8 */
-	stq	$26, 128($sp)		/* $26 */
+	.globl	ret_from_kernel_execve
+	.align	4
+	.ent	ret_from_kernel_execve
+ret_from_kernel_execve:
+	mov	$16, $sp
 	/* Avoid the HAE being gratuitously wrong, to avoid restoring it.  */
 	ldq	$2, alpha_mv+HAE_CACHE
 	stq	$2, 152($sp)		/* HAE */
+	mov	$31, $19		/* to disable syscall restarts */
+	br	$31, ret_to_user
 
-	/* Shuffle FLAGS to the front; add CLONE_VM.  */
-	ldi	$1, CLONE_VM|CLONE_UNTRACED
-	or	$18, $1, $16
-	bsr	$26, sys_clone
-
-	/* We don't actually care for a3 success widgetry in the kernel.
-	   Not for positive errno values.  */
-	stq	$0, 0($sp)		/* $0 */
-	br	ret_to_kernel
-.end kernel_thread
+.end	ret_from_kernel_execve
 
 
 /*
@@ -745,15 +721,6 @@
 .end sys_rt_sigreturn
 
 	.align	4
-	.globl	sys_execve
-	.ent	sys_execve
-sys_execve:
-	.prologue 0
-	mov	$sp, $19
-	jmp	$31, do_sys_execve
-.end sys_execve
-
-	.align	4
 	.globl	alpha_ni_syscall
 	.ent	alpha_ni_syscall
 alpha_ni_syscall:
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 83638aa..4054e0f 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -263,33 +263,35 @@
 
 /*
  * Copy an alpha thread..
- *
- * Note the "stack_offset" stuff: when returning to kernel mode, we need
- * to have some extra stack-space for the kernel stack that still exists
- * after the "ret_from_fork".  When returning to user mode, we only want
- * the space needed by the syscall stack frame (ie "struct pt_regs").
- * Use the passed "regs" pointer to determine how much space we need
- * for a kernel fork().
  */
 
 int
 copy_thread(unsigned long clone_flags, unsigned long usp,
-	    unsigned long unused,
+	    unsigned long arg,
 	    struct task_struct * p, struct pt_regs * regs)
 {
 	extern void ret_from_fork(void);
+	extern void ret_from_kernel_thread(void);
 
 	struct thread_info *childti = task_thread_info(p);
-	struct pt_regs * childregs;
-	struct switch_stack * childstack, *stack;
-	unsigned long stack_offset, settls;
+	struct pt_regs *childregs = task_pt_regs(p);
+	struct switch_stack *childstack, *stack;
+	unsigned long settls;
 
-	stack_offset = PAGE_SIZE - sizeof(struct pt_regs);
-	if (!(regs->ps & 8))
-		stack_offset = (PAGE_SIZE-1) & (unsigned long) regs;
-	childregs = (struct pt_regs *)
-	  (stack_offset + PAGE_SIZE + task_stack_page(p));
-		
+	childstack = ((struct switch_stack *) childregs) - 1;
+	if (unlikely(!regs)) {
+		/* kernel thread */
+		memset(childstack, 0,
+			sizeof(struct switch_stack) + sizeof(struct pt_regs));
+		childstack->r26 = (unsigned long) ret_from_kernel_thread;
+		childstack->r9 = usp;	/* function */
+		childstack->r10 = arg;
+		childregs->hae = alpha_mv.hae_cache,
+		childti->pcb.usp = 0;
+		childti->pcb.ksp = (unsigned long) childstack;
+		childti->pcb.flags = 1;	/* set FEN, clear everything else */
+		return 0;
+	}
 	*childregs = *regs;
 	settls = regs->r20;
 	childregs->r0 = 0;
@@ -297,7 +299,6 @@
 	childregs->r20 = 1;	/* OSF/1 has some strange fork() semantics.  */
 	regs->r20 = 0;
 	stack = ((struct switch_stack *) regs) - 1;
-	childstack = ((struct switch_stack *) childregs) - 1;
 	*childstack = *stack;
 	childstack->r26 = (unsigned long) ret_from_fork;
 	childti->pcb.usp = usp;
@@ -386,27 +387,6 @@
 EXPORT_SYMBOL(dump_elf_task_fp);
 
 /*
- * sys_execve() executes a new program.
- */
-asmlinkage int
-do_sys_execve(const char __user *ufilename,
-	      const char __user *const __user *argv,
-	      const char __user *const __user *envp, struct pt_regs *regs)
-{
-	int error;
-	char *filename;
-
-	filename = getname(ufilename);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-	error = do_execve(filename, argv, envp, regs);
-	putname(filename);
-out:
-	return error;
-}
-
-/*
  * Return saved PC of a blocked thread.  This assumes the frame
  * pointer is the 6th saved long on the kernel stack and that the
  * saved return address is the first long in the frame.  This all
@@ -459,22 +439,3 @@
 	}
 	return pc;
 }
-
-int kernel_execve(const char *path, const char *const argv[], const char *const envp[])
-{
-	/* Avoid the HAE being gratuitously wrong, which would cause us
-	   to do the whole turn off interrupts thing and restore it.  */
-	struct pt_regs regs = {.hae = alpha_mv.hae_cache};
-	int err = do_execve(path, argv, envp, &regs);
-	if (!err) {
-		struct pt_regs *p = current_pt_regs();
-		/* copy regs to normal position and off to userland we go... */
-		*p = regs;
-		__asm__ __volatile__ (
-			"mov	%0, $sp;"
-			"br	$31, ret_from_sys_call"
-			: : "r"(p));
-	}
-	return err;
-}
-EXPORT_SYMBOL(kernel_execve);
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 2867a77..e40eefb 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -52,6 +52,7 @@
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN
+	select GENERIC_KERNEL_THREAD
 	help
 	  The ARM series is a line of low-power-consumption RISC chip designs
 	  licensed by ARM Ltd and targeted at embedded applications and
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 99afa74..06e7d50 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -85,11 +85,6 @@
 #define cpu_relax()			barrier()
 #endif
 
-/*
- * Create a new kernel thread
- */
-extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-
 #define task_pt_regs(p) \
 	((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
 
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index 355ece5..44fe998 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -254,6 +254,11 @@
 	return regs->ARM_sp;
 }
 
+#define current_pt_regs(void) ({				\
+	register unsigned long sp asm ("sp");			\
+	(struct pt_regs *)((sp | (THREAD_SIZE - 1)) - 7) - 1;	\
+})
+
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 74542c5..368165e 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -2,7 +2,6 @@
 #include <asm/barrier.h>
 #include <asm/compiler.h>
 #include <asm/cmpxchg.h>
-#include <asm/exec.h>
 #include <asm/switch_to.h>
 #include <asm/system_info.h>
 #include <asm/system_misc.h>
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index d9ff5cc..f259921 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -478,6 +478,8 @@
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_SOCKETCALL
 #endif
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_KERNEL_EXECVE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index e337879..831cd38 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -20,7 +20,7 @@
 		CALL(sys_creat)
 		CALL(sys_link)
 /* 10 */	CALL(sys_unlink)
-		CALL(sys_execve_wrapper)
+		CALL(sys_execve)
 		CALL(sys_chdir)
 		CALL(OBSOLETE(sys_time))	/* used by libc4 */
 		CALL(sys_mknod)
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index f459870..e340fa1 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -91,6 +91,30 @@
 	b	ret_slow_syscall
 ENDPROC(ret_from_fork)
 
+ENTRY(ret_from_kernel_thread)
+ UNWIND(.fnstart)
+ UNWIND(.cantunwind)
+	bl	schedule_tail
+	mov	r0, r4
+	adr	lr, BSYM(1f)	@ kernel threads should not exit
+	mov	pc, r5
+1:	bl	do_exit
+	nop
+ UNWIND(.fnend)
+ENDPROC(ret_from_kernel_thread)
+
+/*
+ * turn a kernel thread into userland process
+ * use: ret_from_kernel_execve(struct pt_regs *normal)
+ */
+ENTRY(ret_from_kernel_execve)
+	mov	why, #0			@ not a syscall
+	str	why, [r0, #S_R0]	@ ... and we want 0 in ->ARM_r0 as well
+	get_thread_info tsk		@ thread structure
+	mov	sp, r0			@ stack pointer just under pt_regs
+	b	ret_slow_syscall
+ENDPROC(ret_from_kernel_execve)
+
 	.equ NR_syscalls,0
 #define CALL(x) .equ NR_syscalls,NR_syscalls+1
 #include "calls.S"
@@ -517,11 +541,6 @@
 		b	sys_vfork
 ENDPROC(sys_vfork_wrapper)
 
-sys_execve_wrapper:
-		add	r3, sp, #S_OFF
-		b	sys_execve
-ENDPROC(sys_execve_wrapper)
-
 sys_clone_wrapper:
 		add	ip, sp, #S_OFF
 		str	ip, [sp, #4]
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 04eea22..f98c17f 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -373,6 +373,7 @@
 }
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
 
 int
 copy_thread(unsigned long clone_flags, unsigned long stack_start,
@@ -381,13 +382,20 @@
 	struct thread_info *thread = task_thread_info(p);
 	struct pt_regs *childregs = task_pt_regs(p);
 
-	*childregs = *regs;
-	childregs->ARM_r0 = 0;
-	childregs->ARM_sp = stack_start;
-
 	memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
+
+	if (likely(regs)) {
+		*childregs = *regs;
+		childregs->ARM_r0 = 0;
+		childregs->ARM_sp = stack_start;
+		thread->cpu_context.pc = (unsigned long)ret_from_fork;
+	} else {
+		thread->cpu_context.r4 = stk_sz;
+		thread->cpu_context.r5 = stack_start;
+		thread->cpu_context.pc = (unsigned long)ret_from_kernel_thread;
+		childregs->ARM_cpsr = SVC_MODE;
+	}
 	thread->cpu_context.sp = (unsigned long)childregs;
-	thread->cpu_context.pc = (unsigned long)ret_from_fork;
 
 	clear_ptrace_hw_breakpoint(p);
 
@@ -423,63 +431,6 @@
 }
 EXPORT_SYMBOL(dump_fpu);
 
-/*
- * Shuffle the argument into the correct register before calling the
- * thread function.  r4 is the thread argument, r5 is the pointer to
- * the thread function, and r6 points to the exit function.
- */
-extern void kernel_thread_helper(void);
-asm(	".pushsection .text\n"
-"	.align\n"
-"	.type	kernel_thread_helper, #function\n"
-"kernel_thread_helper:\n"
-#ifdef CONFIG_TRACE_IRQFLAGS
-"	bl	trace_hardirqs_on\n"
-#endif
-"	msr	cpsr_c, r7\n"
-"	mov	r0, r4\n"
-"	mov	lr, r6\n"
-"	mov	pc, r5\n"
-"	.size	kernel_thread_helper, . - kernel_thread_helper\n"
-"	.popsection");
-
-#ifdef CONFIG_ARM_UNWIND
-extern void kernel_thread_exit(long code);
-asm(	".pushsection .text\n"
-"	.align\n"
-"	.type	kernel_thread_exit, #function\n"
-"kernel_thread_exit:\n"
-"	.fnstart\n"
-"	.cantunwind\n"
-"	bl	do_exit\n"
-"	nop\n"
-"	.fnend\n"
-"	.size	kernel_thread_exit, . - kernel_thread_exit\n"
-"	.popsection");
-#else
-#define kernel_thread_exit	do_exit
-#endif
-
-/*
- * Create a kernel thread.
- */
-pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-
-	regs.ARM_r4 = (unsigned long)arg;
-	regs.ARM_r5 = (unsigned long)fn;
-	regs.ARM_r6 = (unsigned long)kernel_thread_exit;
-	regs.ARM_r7 = SVC_MODE | PSR_ENDSTATE | PSR_ISETSTATE;
-	regs.ARM_pc = (unsigned long)kernel_thread_helper;
-	regs.ARM_cpsr = regs.ARM_r7 | PSR_I_BIT;
-
-	return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
 unsigned long get_wchan(struct task_struct *p)
 {
 	struct stackframe frame;
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index 76cbb05..c2a898a 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -59,69 +59,6 @@
 	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);
 }
 
-/* sys_execve() executes a new program.
- * This is called indirectly via a small wrapper
- */
-asmlinkage int sys_execve(const char __user *filenamei,
-			  const char __user *const __user *argv,
-			  const char __user *const __user *envp, struct pt_regs *regs)
-{
-	int error;
-	char * filename;
-
-	filename = getname(filenamei);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-	error = do_execve(filename, argv, envp, regs);
-	putname(filename);
-out:
-	return error;
-}
-
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	struct pt_regs regs;
-	int ret;
-
-	memset(&regs, 0, sizeof(struct pt_regs));
-	ret = do_execve(filename,
-			(const char __user *const __user *)argv,
-			(const char __user *const __user *)envp, &regs);
-	if (ret < 0)
-		goto out;
-
-	/*
-	 * Save argc to the register structure for userspace.
-	 */
-	regs.ARM_r0 = ret;
-
-	/*
-	 * We were successful.  We won't be returning to our caller, but
-	 * instead to user space by manipulating the kernel stack.
-	 */
-	asm(	"add	r0, %0, %1\n\t"
-		"mov	r1, %2\n\t"
-		"mov	r2, %3\n\t"
-		"bl	memmove\n\t"	/* copy regs to top of stack */
-		"mov	r8, #0\n\t"	/* not a syscall */
-		"mov	r9, %0\n\t"	/* thread structure */
-		"mov	sp, r0\n\t"	/* reposition stack pointer */
-		"b	ret_to_user"
-		:
-		: "r" (current_thread_info()),
-		  "Ir" (THREAD_START_SP - sizeof(regs)),
-		  "r" (&regs),
-		  "Ir" (sizeof(regs))
-		: "r0", "r1", "r2", "r3", "r8", "r9", "ip", "lr", "memory");
-
- out:
-	return ret;
-}
-EXPORT_SYMBOL(kernel_execve);
-
 /*
  * Since loff_t is a 64 bit type we avoid a lot of ABI hassle
  * with a different argument ordering.
diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild
index e3ba7bc..be0433e 100644
--- a/arch/avr32/include/asm/Kbuild
+++ b/arch/avr32/include/asm/Kbuild
@@ -1,5 +1,6 @@
 include include/asm-generic/Kbuild.asm
 
 generic-y	+= clkdev.h
+generic-y	+= exec.h
 
 header-y	+= cachectl.h
diff --git a/arch/avr32/include/asm/exec.h b/arch/avr32/include/asm/exec.h
deleted file mode 100644
index f467be8..0000000
--- a/arch/avr32/include/asm/exec.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- * Copyright (C) 2004-2006 Atmel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef __ASM_AVR32_EXEC_H
-#define __ASM_AVR32_EXEC_H
-
-#define arch_align_stack(x)	(x)
-
-#endif /* __ASM_AVR32_EXEC_H */
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 277f1a4..4e4e98d 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -12,6 +12,7 @@
 generic-y += dma.h
 generic-y += emergency-restart.h
 generic-y += errno.h
+generic-y += exec.h
 generic-y += fb.h
 generic-y += fcntl.h
 generic-y += futex.h
diff --git a/arch/c6x/include/asm/exec.h b/arch/c6x/include/asm/exec.h
deleted file mode 100644
index 0fea482..0000000
--- a/arch/c6x/include/asm/exec.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_C6X_EXEC_H
-#define _ASM_C6X_EXEC_H
-
-#define arch_align_stack(x) (x)
-
-#endif /* _ASM_C6X_EXEC_H */
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index a8eab26..ff1bf7f 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -9,3 +9,4 @@
 header-y += sync_serial.h
 
 generic-y += clkdev.h
+generic-y += exec.h
diff --git a/arch/cris/include/asm/exec.h b/arch/cris/include/asm/exec.h
deleted file mode 100644
index 9665dab..0000000
--- a/arch/cris/include/asm/exec.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_CRIS_EXEC_H
-#define __ASM_CRIS_EXEC_H
-
-#define arch_align_stack(x) (x)
-
-#endif /* __ASM_CRIS_EXEC_H */
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 13cd044..251bd71 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -3,3 +3,4 @@
 header-y += registers.h
 header-y += termios.h
 generic-y += clkdev.h
+generic-y += exec.h
diff --git a/arch/frv/include/asm/exec.h b/arch/frv/include/asm/exec.h
deleted file mode 100644
index 65c9130..0000000
--- a/arch/frv/include/asm/exec.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* FR-V CPU executable handling
- *
- * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _ASM_EXEC_H
-#define _ASM_EXEC_H
-
-#define arch_align_stack(x) (x)
-
-#endif /* _ASM_EXEC_H */
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 0e152a9..fccd81e 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -1,3 +1,4 @@
 include include/asm-generic/Kbuild.asm
 
-generic-y	+= clkdev.h
+generic-y += clkdev.h
+generic-y += exec.h
diff --git a/arch/h8300/include/asm/exec.h b/arch/h8300/include/asm/exec.h
deleted file mode 100644
index c01c45c..0000000
--- a/arch/h8300/include/asm/exec.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _H8300_EXEC_H
-#define _H8300_EXEC_H
-
-#define arch_align_stack(x) (x)
-
-#endif /* _H8300_EXEC_H */
diff --git a/arch/hexagon/kernel/syscall.c b/arch/hexagon/kernel/syscall.c
index 620dd18..553cd60 100644
--- a/arch/hexagon/kernel/syscall.c
+++ b/arch/hexagon/kernel/syscall.c
@@ -87,4 +87,3 @@
 
 	return retval;
 }
-EXPORT_SYMBOL(kernel_execve);
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 58f3d14..562f593 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -14,3 +14,4 @@
 header-y += ucontext.h
 header-y += ustack.h
 generic-y += clkdev.h
+generic-y += exec.h
diff --git a/arch/ia64/include/asm/exec.h b/arch/ia64/include/asm/exec.h
deleted file mode 100644
index b262424..0000000
--- a/arch/ia64/include/asm/exec.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Process execution defines.
- *
- * Copyright (C) 1998-2003 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
- * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
- */
-#ifndef _ASM_IA64_EXEC_H
-#define _ASM_IA64_EXEC_H
-
-#define arch_align_stack(x) (x)
-
-#endif /* _ASM_IA64_EXEC_H */
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index 0e152a9..fccd81e 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -1,3 +1,4 @@
 include include/asm-generic/Kbuild.asm
 
-generic-y	+= clkdev.h
+generic-y += clkdev.h
+generic-y += exec.h
diff --git a/arch/m32r/include/asm/exec.h b/arch/m32r/include/asm/exec.h
deleted file mode 100644
index c805dbd..0000000
--- a/arch/m32r/include/asm/exec.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001  Hiroyuki Kondo, Hirokazu Takata, and Hitoshi Yamamoto
- * Copyright (C) 2004, 2006  Hirokazu Takata <takata at linux-m32r.org>
- */
-#ifndef _ASM_M32R_EXEC_H
-#define _ASM_M32R_EXEC_H
-
-#define arch_align_stack(x) (x)
-
-#endif /* _ASM_M32R_EXEC_H */
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index bfe675f..ecb5408 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -7,6 +7,7 @@
 generic-y += device.h
 generic-y += emergency-restart.h
 generic-y += errno.h
+generic-y += exec.h
 generic-y += futex.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
diff --git a/arch/m68k/include/asm/exec.h b/arch/m68k/include/asm/exec.h
deleted file mode 100644
index 0499adf..0000000
--- a/arch/m68k/include/asm/exec.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _M68K_EXEC_H
-#define _M68K_EXEC_H
-
-#define arch_align_stack(x) (x)
-
-#endif /* _M68K_EXEC_H */
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 48510f6..8653072 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -2,3 +2,4 @@
 
 header-y  += elf.h
 generic-y += clkdev.h
+generic-y += exec.h
diff --git a/arch/microblaze/include/asm/exec.h b/arch/microblaze/include/asm/exec.h
deleted file mode 100644
index e750de1..0000000
--- a/arch/microblaze/include/asm/exec.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (C) 2006 Atmark Techno, Inc.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#ifndef _ASM_MICROBLAZE_EXEC_H
-#define _ASM_MICROBLAZE_EXEC_H
-
-#define arch_align_stack(x) (x)
-
-#endif /* _ASM_MICROBLAZE_EXEC_H */
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index 0d20f55..fccd81e 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -1,3 +1,4 @@
 include include/asm-generic/Kbuild.asm
 
 generic-y += clkdev.h
+generic-y += exec.h
diff --git a/arch/mn10300/include/asm/exec.h b/arch/mn10300/include/asm/exec.h
deleted file mode 100644
index c74e367..0000000
--- a/arch/mn10300/include/asm/exec.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* MN10300 process execution definitions
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-#ifndef _ASM_EXEC_H
-#define _ASM_EXEC_H
-
-#define arch_align_stack(x) (x)
-
-#endif /* _ASM_EXEC_H */
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index 0587f62..458371a 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -3,3 +3,4 @@
 header-y += pdc.h
 generic-y += clkdev.h
 generic-y += word-at-a-time.h
+generic-y += exec.h
diff --git a/arch/parisc/include/asm/exec.h b/arch/parisc/include/asm/exec.h
deleted file mode 100644
index 6bb5af7..0000000
--- a/arch/parisc/include/asm/exec.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __PARISC_EXEC_H
-#define __PARISC_EXEC_H
-
-#define arch_align_stack(x) (x)
-
-#endif /* __PARISC_EXEC_H */
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index ceff7ae..99d2d79 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -135,6 +135,7 @@
 	select GENERIC_CLOCKEVENTS
 	select KTIME_SCALAR if 32BIT
 	select HAVE_ARCH_SECCOMP_FILTER
+	select GENERIC_KERNEL_THREAD
 
 config SCHED_OMIT_FRAME_POINTER
 	def_bool y
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 56831df..94e749c 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -35,6 +35,7 @@
 extern void s390_adjust_jiffies(void);
 extern const struct seq_operations cpuinfo_op;
 extern int sysctl_ieee_emulation_warnings;
+extern void execve_tail(void);
 
 /*
  * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
@@ -126,6 +127,7 @@
 	regs->psw.mask	= psw_user_bits | PSW_MASK_EA | PSW_MASK_BA;	\
 	regs->psw.addr	= new_psw | PSW_ADDR_AMODE;			\
 	regs->gprs[15]	= new_stackp;					\
+	execve_tail();							\
 } while (0)
 
 #define start_thread31(regs, new_psw, new_stackp) do {			\
@@ -135,6 +137,7 @@
 	__tlb_flush_mm(current->mm);					\
 	crst_table_downgrade(current->mm, 1UL << 31);			\
 	update_mm(current->mm, current);				\
+	execve_tail();							\
 } while (0)
 
 /* Forward declaration, a strange C thing */
@@ -150,7 +153,6 @@
 
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
-extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
 
 /*
  * Return saved PC of a blocked thread.
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 4e64b5c..8192e29 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -417,6 +417,8 @@
 #   define __ARCH_WANT_COMPAT_SYS_TIME
 #   define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 # endif
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_KERNEL_EXECVE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 189963c..65cca95 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -432,32 +432,6 @@
 	return ret;
 }
 
-/*
- * sys32_execve() executes a new program after the asm stub has set
- * things up for us.  This should basically do what I want it to.
- */
-asmlinkage long sys32_execve(const char __user *name, compat_uptr_t __user *argv,
-			     compat_uptr_t __user *envp)
-{
-	struct pt_regs *regs = task_pt_regs(current);
-	char *filename;
-	long rc;
-
-	filename = getname(name);
-	rc = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		return rc;
-	rc = compat_do_execve(filename, argv, envp, regs);
-	if (rc)
-		goto out;
-	current->thread.fp_regs.fpc=0;
-	asm volatile("sfpc %0,0" : : "d" (0));
-	rc = regs->gprs[2];
-out:
-	putname(filename);
-	return rc;
-}
-
 asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf,
 				size_t count, u32 poshi, u32 poslo)
 {
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
index 90887bd..d4d0239 100644
--- a/arch/s390/kernel/compat_linux.h
+++ b/arch/s390/kernel/compat_linux.h
@@ -125,8 +125,6 @@
 			  compat_sigset_t __user *oset, size_t sigsetsize);
 long sys32_rt_sigpending(compat_sigset_t __user *set, size_t sigsetsize);
 long sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo);
-long sys32_execve(const char __user *name, compat_uptr_t __user *argv,
-		  compat_uptr_t __user *envp);
 long sys32_init_module(void __user *umod, unsigned long len,
 		       const char __user *uargs);
 long sys32_delete_module(const char __user *name_user, unsigned int flags);
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 3afba80..ad79b84 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1576,7 +1576,7 @@
 	llgtr	%r2,%r2			# char *
 	llgtr	%r3,%r3			# compat_uptr_t *
 	llgtr	%r4,%r4			# compat_uptr_t *
-	jg	sys32_execve		# branch to system call
+	jg	compat_sys_execve	# branch to system call
 
 ENTRY(sys_fanotify_init_wrapper)
 	llgfr	%r2,%r2			# unsigned int
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 870bad6..ef46f66 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -331,45 +331,38 @@
 	l	%r12,__LC_THREAD_INFO
 	l	%r13,__LC_SVC_NEW_PSW+4
 	tm	__PT_PSW+1(%r11),0x01	# forking a kernel thread ?
-	jo	0f
-	st	%r15,__PT_R15(%r11)	# store stack pointer for new kthread
-0:	l	%r1,BASED(.Lschedule_tail)
+	je	1f
+	l	%r1,BASED(.Lschedule_tail)
 	basr	%r14,%r1		# call schedule_tail
 	TRACE_IRQS_ON
 	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
 	j	sysc_tracenogo
 
+1:	# it's a kernel thread
+	st	%r15,__PT_R15(%r11)	# store stack pointer for new kthread
+	l	%r1,BASED(.Lschedule_tail)
+	basr	%r14,%r1		# call schedule_tail
+	TRACE_IRQS_ON
+	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
+	lm	%r9,%r11,__PT_R9(%r11)	# load gprs
+ENTRY(kernel_thread_starter)
+	la	%r2,0(%r10)
+	basr	%r14,%r9
+	la	%r2,0
+	br	%r11			# do_exit
+
 #
 # kernel_execve function needs to deal with pt_regs that is not
 # at the usual place
 #
-ENTRY(kernel_execve)
-	stm	%r12,%r15,48(%r15)
-	lr	%r14,%r15
-	l	%r13,__LC_SVC_NEW_PSW+4
-	ahi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	st	%r14,__SF_BACKCHAIN(%r15)
-	la	%r12,STACK_FRAME_OVERHEAD(%r15)
-	xc	0(__PT_SIZE,%r12),0(%r12)
-	l	%r1,BASED(.Ldo_execve)
-	lr	%r5,%r12
-	basr	%r14,%r1		# call do_execve
-	ltr	%r2,%r2
-	je	0f
-	ahi	%r15,(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	lm	%r12,%r15,48(%r15)
-	br	%r14
-	# execve succeeded.
-0:	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
-	l	%r15,__LC_KERNEL_STACK	# load ksp
-	ahi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	mvc	0(__PT_SIZE,%r11),0(%r12)	# copy pt_regs
-	l	%r12,__LC_THREAD_INFO
+ENTRY(ret_from_kernel_execve)
+	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
+	lr	%r15,%r2
+	lr	%r11,%r2
+	ahi	%r15,-STACK_FRAME_OVERHEAD
 	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
+	l	%r12,__LC_THREAD_INFO
 	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	l	%r1,BASED(.Lexecve_tail)
-	basr	%r14,%r1		# call execve_tail
 	j	sysc_return
 
 /*
@@ -931,8 +924,6 @@
 .Ldo_signal:		.long	do_signal
 .Ldo_notify_resume:	.long	do_notify_resume
 .Ldo_per_trap:		.long	do_per_trap
-.Ldo_execve:		.long	do_execve
-.Lexecve_tail:		.long	execve_tail
 .Ljump_table:		.long	pgm_check_table
 .Lschedule:		.long	schedule
 #ifdef CONFIG_PREEMPT
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index a5f4dc4..d0d3f69 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -58,9 +58,6 @@
 long sys_clone(unsigned long newsp, unsigned long clone_flags,
 	       int __user *parent_tidptr, int __user *child_tidptr);
 long sys_vfork(void);
-void execve_tail(void);
-long sys_execve(const char __user *name, const char __user *const __user *argv,
-		const char __user *const __user *envp);
 long sys_sigsuspend(int history0, int history1, old_sigset_t mask);
 long sys_sigaction(int sig, const struct old_sigaction __user *act,
 		   struct old_sigaction __user *oact);
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 7549985..f9761f8 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -353,41 +353,31 @@
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	lg	%r12,__LC_THREAD_INFO
 	tm	__PT_PSW+1(%r11),0x01	# forking a kernel thread ?
-	jo	0f
-	stg	%r15,__PT_R15(%r11)	# store stack pointer for new kthread
-0:	brasl	%r14,schedule_tail
+	je	1f
+	brasl	%r14,schedule_tail
 	TRACE_IRQS_ON
 	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
 	j	sysc_tracenogo
-
-#
-# kernel_execve function needs to deal with pt_regs that is not
-# at the usual place
-#
-ENTRY(kernel_execve)
-	stmg	%r12,%r15,96(%r15)
-	lgr	%r14,%r15
-	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	stg	%r14,__SF_BACKCHAIN(%r15)
-	la	%r12,STACK_FRAME_OVERHEAD(%r15)
-	xc	0(__PT_SIZE,%r12),0(%r12)
-	lgr	%r5,%r12
-	brasl	%r14,do_execve
-	ltgfr	%r2,%r2
-	je	0f
-	aghi	%r15,(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	lmg	%r12,%r15,96(%r15)
-	br	%r14
-	# execve succeeded.
-0:	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
-	lg	%r15,__LC_KERNEL_STACK	# load ksp
-	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	mvc	0(__PT_SIZE,%r11),0(%r12)	# copy pt_regs
-	lg	%r12,__LC_THREAD_INFO
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+1:	# it's a kernel thread
+	stg	%r15,__PT_R15(%r11)	# store stack pointer for new kthread
+	brasl	%r14,schedule_tail
+	TRACE_IRQS_ON
 	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	brasl	%r14,execve_tail
+	lmg	%r9,%r11,__PT_R9(%r11)	# load gprs
+ENTRY(kernel_thread_starter)
+	la	%r2,0(%r10)
+	basr	%r14,%r9
+	la	%r2,0
+	br	%r11			# do_exit
+
+ENTRY(ret_from_kernel_execve)
+	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
+	lgr	%r15,%r2
+	lgr	%r11,%r2
+	aghi	%r15,-STACK_FRAME_OVERHEAD
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	lg	%r12,__LC_THREAD_INFO
+	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
 	j	sysc_return
 
 /*
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 5024be2..cd31ad4 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -100,35 +100,6 @@
 
 extern void __kprobes kernel_thread_starter(void);
 
-asm(
-	".section .kprobes.text, \"ax\"\n"
-	".global kernel_thread_starter\n"
-	"kernel_thread_starter:\n"
-	"    la    2,0(10)\n"
-	"    basr  14,9\n"
-	"    la    2,0\n"
-	"    br    11\n"
-	".previous\n");
-
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-	regs.psw.mask = psw_kernel_bits |
-		PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-	regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE;
-	regs.gprs[9] = (unsigned long) fn;
-	regs.gprs[10] = (unsigned long) arg;
-	regs.gprs[11] = (unsigned long) do_exit;
-	regs.orig_gpr2 = -1;
-
-	/* Ok, create the new process.. */
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED,
-		       0, &regs, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
 /*
  * Free current thread data structures etc..
  */
@@ -146,7 +117,7 @@
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
-		unsigned long unused,
+		unsigned long arg,
 		struct task_struct *p, struct pt_regs *regs)
 {
 	struct thread_info *ti;
@@ -158,20 +129,44 @@
 
 	frame = container_of(task_pt_regs(p), struct fake_frame, childregs);
 	p->thread.ksp = (unsigned long) frame;
-	/* Store access registers to kernel stack of new process. */
-	frame->childregs = *regs;
-	frame->childregs.gprs[2] = 0;	/* child returns 0 on fork. */
-	frame->childregs.gprs[15] = new_stackp;
-	frame->sf.back_chain = 0;
+	/* Save access registers to new thread structure. */
+	save_access_regs(&p->thread.acrs[0]);
+	/* start new process with ar4 pointing to the correct address space */
+	p->thread.mm_segment = get_fs();
+	/* Don't copy debug registers */
+	memset(&p->thread.per_user, 0, sizeof(p->thread.per_user));
+	memset(&p->thread.per_event, 0, sizeof(p->thread.per_event));
+	clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
+	clear_tsk_thread_flag(p, TIF_PER_TRAP);
+	/* Initialize per thread user and system timer values */
+	ti = task_thread_info(p);
+	ti->user_timer = 0;
+	ti->system_timer = 0;
 
+	frame->sf.back_chain = 0;
 	/* new return point is ret_from_fork */
 	frame->sf.gprs[8] = (unsigned long) ret_from_fork;
-
 	/* fake return stack for resume(), don't go back to schedule */
 	frame->sf.gprs[9] = (unsigned long) frame;
 
-	/* Save access registers to new thread structure. */
-	save_access_regs(&p->thread.acrs[0]);
+	/* Store access registers to kernel stack of new process. */
+	if (unlikely(!regs)) {
+		/* kernel thread */
+		memset(&frame->childregs, 0, sizeof(struct pt_regs));
+		frame->childregs.psw.mask = psw_kernel_bits | PSW_MASK_DAT |
+				PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+		frame->childregs.psw.addr = PSW_ADDR_AMODE |
+				(unsigned long) kernel_thread_starter;
+		frame->childregs.gprs[9] = new_stackp; /* function */
+		frame->childregs.gprs[10] = arg;
+		frame->childregs.gprs[11] = (unsigned long) do_exit;
+		frame->childregs.orig_gpr2 = -1;
+
+		return 0;
+	}
+	frame->childregs = *regs;
+	frame->childregs.gprs[2] = 0;	/* child returns 0 on fork. */
+	frame->childregs.gprs[15] = new_stackp;
 
 	/* Don't copy runtime instrumentation info */
 	p->thread.ri_cb = NULL;
@@ -202,17 +197,6 @@
 		}
 	}
 #endif /* CONFIG_64BIT */
-	/* start new process with ar4 pointing to the correct address space */
-	p->thread.mm_segment = get_fs();
-	/* Don't copy debug registers */
-	memset(&p->thread.per_user, 0, sizeof(p->thread.per_user));
-	memset(&p->thread.per_event, 0, sizeof(p->thread.per_event));
-	clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
-	clear_tsk_thread_flag(p, TIF_PER_TRAP);
-	/* Initialize per thread user and system timer values */
-	ti = task_thread_info(p);
-	ti->user_timer = 0;
-	ti->system_timer = 0;
 	return 0;
 }
 
@@ -258,31 +242,6 @@
 }
 
 /*
- * sys_execve() executes a new program.
- */
-SYSCALL_DEFINE3(execve, const char __user *, name,
-		const char __user *const __user *, argv,
-		const char __user *const __user *, envp)
-{
-	struct pt_regs *regs = task_pt_regs(current);
-	char *filename;
-	long rc;
-
-	filename = getname(name);
-	rc = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		return rc;
-	rc = do_execve(filename, argv, envp, regs);
-	if (rc)
-		goto out;
-	execve_tail();
-	rc = regs->gprs[2];
-out:
-	putname(filename);
-	return rc;
-}
-
-/*
  * fill in the FPU structure for a core dump.
  */
 int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 7b673dd..86eadce 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -7,6 +7,7 @@
 generic-y += div64.h
 generic-y += emergency-restart.h
 generic-y += errno.h
+generic-y += exec.h
 generic-y += fcntl.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
diff --git a/arch/sh/include/asm/exec.h b/arch/sh/include/asm/exec.h
deleted file mode 100644
index 69486a9..0000000
--- a/arch/sh/include/asm/exec.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/*
- * Copyright (C) 1999, 2000  Niibe Yutaka  &  Kaz Kojima
- * Copyright (C) 2002 Paul Mundt
- */
-#ifndef __ASM_SH_EXEC_H
-#define __ASM_SH_EXEC_H
-
-#define arch_align_stack(x) (x)
-
-#endif /* __ASM_SH_EXEC_H */
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 61d41c1..10d54e5 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -3,6 +3,7 @@
 
 generic-y += clkdev.h
 generic-y += div64.h
+generic-y += exec.h
 generic-y += local64.h
 generic-y += irq_regs.h
 generic-y += local.h
diff --git a/arch/sparc/include/asm/exec.h b/arch/sparc/include/asm/exec.h
deleted file mode 100644
index 2e08588..0000000
--- a/arch/sparc/include/asm/exec.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __SPARC_EXEC_H
-#define __SPARC_EXEC_H
-
-#define arch_align_stack(x) (x)
-
-#endif /* __SPARC_EXEC_H */
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index ea2e8ea..5cd98fa 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -13,6 +13,7 @@
 generic-y += div64.h
 generic-y += emergency-restart.h
 generic-y += errno.h
+generic-y += exec.h
 generic-y += fb.h
 generic-y += fcntl.h
 generic-y += ioctl.h
diff --git a/arch/tile/include/asm/exec.h b/arch/tile/include/asm/exec.h
deleted file mode 100644
index a714e19..0000000
--- a/arch/tile/include/asm/exec.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#ifndef _ASM_TILE_EXEC_H
-#define _ASM_TILE_EXEC_H
-
-#define arch_align_stack(x) (x)
-
-#endif /* _ASM_TILE_EXEC_H */
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 24e97be..1e82e95 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -63,8 +63,6 @@
 {
 }
 
-extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
-
 extern unsigned long thread_saved_pc(struct task_struct *t);
 
 static inline void mm_copy_segments(struct mm_struct *from_mm,
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index de66c42..ab019c7 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -16,7 +16,6 @@
 #include <mem_user.h>
 #include <skas.h>
 #include <os.h>
-#include "internal.h"
 
 void flush_thread(void)
 {
@@ -49,27 +48,7 @@
 }
 EXPORT_SYMBOL(start_thread);
 
-long um_execve(const char *file, const char __user *const __user *argv, const char __user *const __user *env)
+void __noreturn ret_from_kernel_execve(struct pt_regs *unused)
 {
-	long err;
-
-	err = do_execve(file, argv, env, &current->thread.regs);
-	if (!err)
-		UML_LONGJMP(current->thread.exec_buf, 1);
-	return err;
-}
-
-long sys_execve(const char __user *file, const char __user *const __user *argv,
-		const char __user *const __user *env)
-{
-	long error;
-	char *filename;
-
-	filename = getname(file);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename)) goto out;
-	error = do_execve(filename, argv, env, &current->thread.regs);
-	putname(filename);
- out:
-	return error;
+	UML_LONGJMP(current->thread.exec_buf, 1);
 }
diff --git a/arch/um/kernel/internal.h b/arch/um/kernel/internal.h
deleted file mode 100644
index 5bf97db..0000000
--- a/arch/um/kernel/internal.h
+++ /dev/null
@@ -1 +0,0 @@
-extern long um_execve(const char *file, const char __user *const __user *argv, const char __user *const __user *env);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 41f5324..3062978 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -69,18 +69,6 @@
 	return page;
 }
 
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	int pid;
-
-	current->thread.request.u.thread.proc = fn;
-	current->thread.request.u.thread.arg = arg;
-	pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0,
-		      &current->thread.regs, 0, NULL, NULL);
-	return pid;
-}
-EXPORT_SYMBOL(kernel_thread);
-
 static inline void set_current(struct task_struct *task)
 {
 	cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task)
@@ -177,7 +165,7 @@
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long stack_top, struct task_struct * p,
+		unsigned long arg, struct task_struct * p,
 		struct pt_regs *regs)
 {
 	void (*handler)(void);
@@ -198,7 +186,8 @@
 		arch_copy_thread(&current->thread.arch, &p->thread.arch);
 	} else {
 		get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp);
-		p->thread.request.u.thread = current->thread.request.u.thread;
+		p->thread.request.u.thread.proc = (int (*)(void *))sp;
+		p->thread.request.u.thread.arg = (void *)arg;
 		handler = new_thread_handler;
 	}
 
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index 10808bd..a81f370 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -13,7 +13,6 @@
 #include <asm/mman.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
-#include "internal.h"
 
 long sys_fork(void)
 {
@@ -50,19 +49,3 @@
  out:
 	return err;
 }
-
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	mm_segment_t fs;
-	int ret;
-
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-	ret = um_execve(filename, (const char __user *const __user *)argv,
-			(const char __user *const __user *) envp);
-	set_fs(fs);
-
-	return ret;
-}
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index 123c59a..c910c98 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -11,6 +11,7 @@
 generic-y += div64.h
 generic-y += emergency-restart.h
 generic-y += errno.h
+generic-y += exec.h
 generic-y += fb.h
 generic-y += fcntl.h
 generic-y += ftrace.h
diff --git a/arch/unicore32/include/asm/exec.h b/arch/unicore32/include/asm/exec.h
deleted file mode 100644
index 06d1f0f..0000000
--- a/arch/unicore32/include/asm/exec.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Process execution bits for PKUnity SoC and UniCore ISA
- *
- * Copyright (C) 2001-2012 GUAN Xue-tao
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef __UNICORE_EXEC_H__
-#define __UNICORE_EXEC_H__
-
-#define arch_align_stack(x)		(x)
-
-#endif /* __UNICORE_EXEC_H__ */
diff --git a/arch/unicore32/kernel/sys.c b/arch/unicore32/kernel/sys.c
index 3afe60a..5fd9af7 100644
--- a/arch/unicore32/kernel/sys.c
+++ b/arch/unicore32/kernel/sys.c
@@ -104,7 +104,6 @@
  out:
 	return ret;
 }
-EXPORT_SYMBOL(kernel_execve);
 
 /* Note: used by the compat code even in 64-bit Linux. */
 SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1ae94bc..42d2c35 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -108,6 +108,7 @@
 	select GENERIC_STRNLEN_USER
 	select HAVE_RCU_USER_QS if X86_64
 	select HAVE_IRQ_TIME_ACCOUNTING
+	select GENERIC_KERNEL_THREAD
 
 config INSTRUCTION_DECODER
 	def_bool y
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 9c28950..076745f 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -465,7 +465,7 @@
 	PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
 	PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
 	PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
-	PTREGSCALL stub32_execve, sys32_execve, %rcx
+	PTREGSCALL stub32_execve, compat_sys_execve, %rcx
 	PTREGSCALL stub32_fork, sys_fork, %rdi
 	PTREGSCALL stub32_clone, sys32_clone, %rdx
 	PTREGSCALL stub32_vfork, sys_vfork, %rdi
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index c5b938d..86d68d1 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -385,21 +385,6 @@
 	return ret;
 }
 
-asmlinkage long sys32_execve(const char __user *name, compat_uptr_t __user *argv,
-			     compat_uptr_t __user *envp, struct pt_regs *regs)
-{
-	long error;
-	char *filename;
-
-	filename = getname(name);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		return error;
-	error = compat_do_execve(filename, argv, envp, regs);
-	putname(filename);
-	return error;
-}
-
 asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp,
 			    struct pt_regs *regs)
 {
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index b98c0d9..ad1fc85 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -588,11 +588,6 @@
 } mm_segment_t;
 
 
-/*
- * create a kernel thread without removing it from tasklists
- */
-extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 4ca1c61..a9a8cf3 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -54,8 +54,6 @@
 asmlinkage long sys32_personality(unsigned long);
 asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
 
-asmlinkage long sys32_execve(const char __user *, compat_uptr_t __user *,
-			     compat_uptr_t __user *, struct pt_regs *);
 asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
 
 long sys32_lseek(unsigned int, int, unsigned int);
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index f1d8b44..2be0b88 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -25,7 +25,7 @@
 int sys_vfork(struct pt_regs *);
 long sys_execve(const char __user *,
 		const char __user *const __user *,
-		const char __user *const __user *, struct pt_regs *);
+		const char __user *const __user *);
 long sys_clone(unsigned long, unsigned long, void __user *,
 	       void __user *, struct pt_regs *);
 
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index c535d847..2d946e6 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -79,7 +79,6 @@
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
-#define TIF_IRET		5	/* force IRET */
 #define TIF_SYSCALL_EMU		6	/* syscall emulation active */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
@@ -105,7 +104,6 @@
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
-#define _TIF_IRET		(1 << TIF_IRET)
 #define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 0d9776e..55d1555 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -50,6 +50,8 @@
 # define __ARCH_WANT_SYS_TIME
 # define __ARCH_WANT_SYS_UTIME
 # define __ARCH_WANT_SYS_WAITPID
+# define __ARCH_WANT_SYS_EXECVE
+# define __ARCH_WANT_KERNEL_EXECVE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a48ea05..91ce48f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,7 +23,7 @@
 obj-y			+= setup.o x86_init.o i8259.o irqinit.o jump_label.o
 obj-$(CONFIG_IRQ_WORK)  += irq_work.o
 obj-y			+= probe_roms.o
-obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
+obj-$(CONFIG_X86_32)	+= i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-y			+= syscall_$(BITS).o
 obj-$(CONFIG_X86_64)	+= vsyscall_64.o
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 68de2dc..2861082 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -69,4 +69,7 @@
 	OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
 	OFFSET(BP_pref_address, boot_params, hdr.pref_address);
 	OFFSET(BP_code32_start, boot_params, hdr.code32_start);
+
+	BLANK();
+	DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
 }
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 0750e3b..8f9ed1a 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -299,6 +299,13 @@
 	CFI_ENDPROC
 END(ret_from_fork)
 
+ENTRY(ret_from_kernel_execve)
+	movl %eax, %esp
+	movl $0,PT_EAX(%esp)
+	GET_THREAD_INFO(%ebp)
+	jmp syscall_exit
+END(ret_from_kernel_execve)
+
 /*
  * Interrupt exit functions should be protected against kprobes
  */
@@ -323,8 +330,7 @@
 	andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
 #else
 	/*
-	 * We can be coming here from a syscall done in the kernel space,
-	 * e.g. a failed kernel_execve().
+	 * We can be coming here from child spawned by kernel_thread().
 	 */
 	movl PT_CS(%esp), %eax
 	andl $SEGMENT_RPL_MASK, %eax
@@ -732,7 +738,6 @@
 PTREGSCALL1(iopl)
 PTREGSCALL0(fork)
 PTREGSCALL0(vfork)
-PTREGSCALL3(execve)
 PTREGSCALL2(sigaltstack)
 PTREGSCALL0(sigreturn)
 PTREGSCALL0(rt_sigreturn)
@@ -1015,15 +1020,20 @@
  */
 	.popsection
 
-ENTRY(kernel_thread_helper)
-	pushl $0		# fake return address for unwinder
+ENTRY(ret_from_kernel_thread)
 	CFI_STARTPROC
-	movl %edi,%eax
-	call *%esi
+	pushl_cfi %eax
+	call schedule_tail
+	GET_THREAD_INFO(%ebp)
+	popl_cfi %eax
+	pushl_cfi $0x0202		# Reset kernel eflags
+	popfl_cfi
+	movl PT_EBP(%esp),%eax
+	call *PT_EBX(%esp)
 	call do_exit
 	ud2			# padding for call trace
 	CFI_ENDPROC
-ENDPROC(kernel_thread_helper)
+ENDPROC(ret_from_kernel_thread)
 
 #ifdef CONFIG_XEN
 /* Xen doesn't set %esp to be precisely what the normal sysenter
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 44531ac..cdc790c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -554,7 +554,7 @@
 	RESTORE_REST
 
 	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
-	jz   retint_restore_args
+	jz   1f
 
 	testl $_TIF_IA32, TI_flags(%rcx)	# 32-bit compat task needs IRET
 	jnz  int_ret_from_sys_call
@@ -562,6 +562,16 @@
 	RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
 	jmp ret_from_sys_call			# go to the SYSRET fastpath
 
+1:
+	subq $REST_SKIP, %rsp	# move the stack pointer back
+	CFI_ADJUST_CFA_OFFSET	REST_SKIP
+	movq %rbp, %rdi
+	call *%rbx
+	# exit
+	mov %eax, %edi
+	call do_exit
+	ud2			# padding for call trace
+
 	CFI_ENDPROC
 END(ret_from_fork)
 
@@ -862,7 +872,6 @@
 	PARTIAL_FRAME 0
 	SAVE_REST
 	FIXUP_TOP_OF_STACK %r11
-	movq %rsp, %rcx
 	call sys_execve
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
@@ -912,8 +921,7 @@
 	PARTIAL_FRAME 0
 	SAVE_REST
 	FIXUP_TOP_OF_STACK %r11
-	movq %rsp, %rcx
-	call sys32_execve
+	call compat_sys_execve
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
 	RESTORE_REST
@@ -1318,51 +1326,19 @@
 	jmp  2b
 	.previous
 
-ENTRY(kernel_thread_helper)
-	pushq $0		# fake return address
-	CFI_STARTPROC
-	/*
-	 * Here we are in the child and the registers are set as they were
-	 * at kernel_thread() invocation in the parent.
-	 */
-	call *%rsi
-	# exit
-	mov %eax, %edi
-	call do_exit
-	ud2			# padding for call trace
-	CFI_ENDPROC
-END(kernel_thread_helper)
-
-/*
- * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
- *
- * C extern interface:
- *	 extern long execve(const char *name, char **argv, char **envp)
- *
- * asm input arguments:
- *	rdi: name, rsi: argv, rdx: envp
- *
- * We want to fallback into:
- *	extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs)
- *
- * do_sys_execve asm fallback arguments:
- *	rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
- */
-ENTRY(kernel_execve)
-	CFI_STARTPROC
-	FAKE_STACK_FRAME $0
-	SAVE_ALL
-	movq %rsp,%rcx
-	call sys_execve
-	movq %rax, RAX(%rsp)
-	RESTORE_REST
-	testq %rax,%rax
-	je int_ret_from_sys_call
-	RESTORE_ARGS
-	UNFAKE_STACK_FRAME
-	ret
-	CFI_ENDPROC
-END(kernel_execve)
+ENTRY(ret_from_kernel_execve)
+	movq %rdi, %rsp
+	movl $0, RAX(%rsp)
+	// RESTORE_REST
+	movq 0*8(%rsp), %r15
+	movq 1*8(%rsp), %r14
+	movq 2*8(%rsp), %r13
+	movq 3*8(%rsp), %r12
+	movq 4*8(%rsp), %rbp
+	movq 5*8(%rsp), %rbx
+	addq $(6*8), %rsp
+	jmp int_ret_from_sys_call
+END(ret_from_kernel_execve)
 
 /* Call softirq on interrupt stack. Interrupts are off. */
 ENTRY(call_softirq)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index dc3567e..b644e1c 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -293,71 +293,6 @@
 }
 
 /*
- * This gets run with %si containing the
- * function to call, and %di containing
- * the "args".
- */
-extern void kernel_thread_helper(void);
-
-/*
- * Create a kernel thread
- */
-int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-
-	regs.si = (unsigned long) fn;
-	regs.di = (unsigned long) arg;
-
-#ifdef CONFIG_X86_32
-	regs.ds = __USER_DS;
-	regs.es = __USER_DS;
-	regs.fs = __KERNEL_PERCPU;
-	regs.gs = __KERNEL_STACK_CANARY;
-#else
-	regs.ss = __KERNEL_DS;
-#endif
-
-	regs.orig_ax = -1;
-	regs.ip = (unsigned long) kernel_thread_helper;
-	regs.cs = __KERNEL_CS | get_kernel_rpl();
-	regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
-
-	/* Ok, create the new process.. */
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
-/*
- * sys_execve() executes a new program.
- */
-long sys_execve(const char __user *name,
-		const char __user *const __user *argv,
-		const char __user *const __user *envp, struct pt_regs *regs)
-{
-	long error;
-	char *filename;
-
-	filename = getname(name);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		return error;
-	error = do_execve(filename, argv, envp, regs);
-
-#ifdef CONFIG_X86_32
-	if (error == 0) {
-		/* Make sure we don't return using sysenter.. */
-                set_thread_flag(TIF_IRET);
-        }
-#endif
-
-	putname(filename);
-	return error;
-}
-
-/*
  * Idle related variables and functions
  */
 unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index b9ff83c..44e0bff 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -57,6 +57,7 @@
 #include <asm/switch_to.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
 
 /*
  * Return saved PC of a blocked thread.
@@ -127,23 +128,39 @@
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-	unsigned long unused,
+	unsigned long arg,
 	struct task_struct *p, struct pt_regs *regs)
 {
-	struct pt_regs *childregs;
+	struct pt_regs *childregs = task_pt_regs(p);
 	struct task_struct *tsk;
 	int err;
 
-	childregs = task_pt_regs(p);
-	*childregs = *regs;
-	childregs->ax = 0;
-	childregs->sp = sp;
-
 	p->thread.sp = (unsigned long) childregs;
 	p->thread.sp0 = (unsigned long) (childregs+1);
 
-	p->thread.ip = (unsigned long) ret_from_fork;
+	if (unlikely(!regs)) {
+		/* kernel thread */
+		memset(childregs, 0, sizeof(struct pt_regs));
+		p->thread.ip = (unsigned long) ret_from_kernel_thread;
+		task_user_gs(p) = __KERNEL_STACK_CANARY;
+		childregs->ds = __USER_DS;
+		childregs->es = __USER_DS;
+		childregs->fs = __KERNEL_PERCPU;
+		childregs->bx = sp;	/* function */
+		childregs->bp = arg;
+		childregs->orig_ax = -1;
+		childregs->cs = __KERNEL_CS | get_kernel_rpl();
+		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+		p->fpu_counter = 0;
+		p->thread.io_bitmap_ptr = NULL;
+		memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+		return 0;
+	}
+	*childregs = *regs;
+	childregs->ax = 0;
+	childregs->sp = sp;
 
+	p->thread.ip = (unsigned long) ret_from_fork;
 	task_user_gs(p) = get_user_gs(regs);
 
 	p->fpu_counter = 0;
@@ -190,6 +207,12 @@
 	regs->cs		= __USER_CS;
 	regs->ip		= new_ip;
 	regs->sp		= new_sp;
+	regs->flags		= X86_EFLAGS_IF;
+	/*
+	 * force it to the iret return path by making it look as if there was
+	 * some work pending.
+	 */
+	set_thread_flag(TIF_NOTIFY_RESUME);
 }
 EXPORT_SYMBOL_GPL(start_thread);
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 8a6d20c..16c6365 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -146,29 +146,18 @@
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long unused,
+		unsigned long arg,
 	struct task_struct *p, struct pt_regs *regs)
 {
 	int err;
 	struct pt_regs *childregs;
 	struct task_struct *me = current;
 
-	childregs = ((struct pt_regs *)
-			(THREAD_SIZE + task_stack_page(p))) - 1;
-	*childregs = *regs;
-
-	childregs->ax = 0;
-	if (user_mode(regs))
-		childregs->sp = sp;
-	else
-		childregs->sp = (unsigned long)childregs;
-
+	p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
+	childregs = task_pt_regs(p);
 	p->thread.sp = (unsigned long) childregs;
-	p->thread.sp0 = (unsigned long) (childregs+1);
 	p->thread.usersp = me->thread.usersp;
-
 	set_tsk_thread_flag(p, TIF_FORK);
-
 	p->fpu_counter = 0;
 	p->thread.io_bitmap_ptr = NULL;
 
@@ -178,6 +167,24 @@
 	p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
 	savesegment(es, p->thread.es);
 	savesegment(ds, p->thread.ds);
+	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
+	if (unlikely(!regs)) {
+		/* kernel thread */
+		memset(childregs, 0, sizeof(struct pt_regs));
+		childregs->sp = (unsigned long)childregs;
+		childregs->ss = __KERNEL_DS;
+		childregs->bx = sp; /* function */
+		childregs->bp = arg;
+		childregs->orig_ax = -1;
+		childregs->cs = __KERNEL_CS | get_kernel_rpl();
+		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+		return 0;
+	}
+	*childregs = *regs;
+
+	childregs->ax = 0;
+	childregs->sp = sp;
 
 	err = -ENOMEM;
 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b33144c..29ad351 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -840,10 +840,6 @@
 	if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
 		fire_user_return_notifiers();
 
-#ifdef CONFIG_X86_32
-	clear_thread_flag(TIF_IRET);
-#endif /* CONFIG_X86_32 */
-
 	rcu_user_enter();
 }
 
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
deleted file mode 100644
index 0b0cb5f..0000000
--- a/arch/x86/kernel/sys_i386_32.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * This file contains various random system calls that
- * have a non-standard calling sequence on the Linux/i386
- * platform.
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/smp.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/stat.h>
-#include <linux/syscalls.h>
-#include <linux/mman.h>
-#include <linux/file.h>
-#include <linux/utsname.h>
-#include <linux/ipc.h>
-
-#include <linux/uaccess.h>
-#include <linux/unistd.h>
-
-#include <asm/syscalls.h>
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	long __res;
-	asm volatile ("int $0x80"
-	: "=a" (__res)
-	: "0" (__NR_execve), "b" (filename), "c" (argv), "d" (envp) : "memory");
-	return __res;
-}
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 54abcc0..5c9687b 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -561,9 +561,9 @@
 		if ((trapno == 3) || (trapno == 1)) {
 			KVM86->regs32->ax = VM86_TRAP + (trapno << 8);
 			/* setting this flag forces the code in entry_32.S to
-			   call save_v86_state() and change the stack pointer
-			   to KVM86->regs32 */
-			set_thread_flag(TIF_IRET);
+			   the path where we call save_v86_state() and change
+			   the stack pointer to KVM86->regs32 */
+			set_thread_flag(TIF_NOTIFY_RESUME);
 			return 0;
 		}
 		do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 7a35a6e..a47103f 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -17,7 +17,7 @@
 8	i386	creat			sys_creat
 9	i386	link			sys_link
 10	i386	unlink			sys_unlink
-11	i386	execve			ptregs_execve			stub32_execve
+11	i386	execve			sys_execve			stub32_execve
 12	i386	chdir			sys_chdir
 13	i386	time			sys_time			compat_sys_time
 14	i386	mknod			sys_mknod
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index aeaff8b..30c4eec 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -13,6 +13,7 @@
 config UML_X86
 	def_bool y
 	select GENERIC_FIND_FIRST_BIT
+	select GENERIC_KERNEL_THREAD
 
 config 64BIT
 	bool "64-bit kernel" if SUBARCH = "x86"
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index b5408ce..232e605 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -25,7 +25,6 @@
 #define old_mmap sys_old_mmap
 
 #define ptregs_fork sys_fork
-#define ptregs_execve sys_execve
 #define ptregs_iopl sys_iopl
 #define ptregs_vm86old sys_vm86old
 #define ptregs_clone i386_clone
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 0d20f55..fccd81e 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -1,3 +1,4 @@
 include include/asm-generic/Kbuild.asm
 
 generic-y += clkdev.h
+generic-y += exec.h
diff --git a/arch/xtensa/include/asm/exec.h b/arch/xtensa/include/asm/exec.h
deleted file mode 100644
index af949e2..0000000
--- a/arch/xtensa/include/asm/exec.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-
-#ifndef _XTENSA_EXEC_H
-#define _XTENSA_EXEC_H
-
-#define arch_align_stack(x) (x)
-
-#endif /* _XTENSA_EXEC_H */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index e800dec..fbd9f60 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -36,7 +36,6 @@
 #include <asm/uaccess.h>
 #include <asm/param.h>
 #include <asm/page.h>
-#include <asm/exec.h>
 
 #ifndef user_long_t
 #define user_long_t long
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 262db11..a460491 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -39,7 +39,6 @@
 #include <asm/uaccess.h>
 #include <asm/param.h>
 #include <asm/pgalloc.h>
-#include <asm/exec.h>
 
 typedef char *elf_caddr_t;
 
diff --git a/fs/exec.c b/fs/exec.c
index 4f2bebc..ca43453 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -59,7 +59,6 @@
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/tlb.h>
-#include <asm/exec.h>
 
 #include <trace/events/task.h>
 #include "internal.h"
@@ -392,7 +391,7 @@
 	union {
 		const char __user *const __user *native;
 #ifdef CONFIG_COMPAT
-		compat_uptr_t __user *compat;
+		const compat_uptr_t __user *compat;
 #endif
 	} ptr;
 };
@@ -1574,9 +1573,9 @@
 }
 
 #ifdef CONFIG_COMPAT
-int compat_do_execve(char *filename,
-	compat_uptr_t __user *__argv,
-	compat_uptr_t __user *__envp,
+int compat_do_execve(const char *filename,
+	const compat_uptr_t __user *__argv,
+	const compat_uptr_t __user *__envp,
 	struct pt_regs *regs)
 {
 	struct user_arg_ptr argv = {
@@ -1658,3 +1657,55 @@
 {
 	return __get_dumpable(mm->flags);
 }
+
+#ifdef __ARCH_WANT_SYS_EXECVE
+SYSCALL_DEFINE3(execve,
+		const char __user *, filename,
+		const char __user *const __user *, argv,
+		const char __user *const __user *, envp)
+{
+	const char *path = getname(filename);
+	int error = PTR_ERR(path);
+	if (!IS_ERR(path)) {
+		error = do_execve(path, argv, envp, current_pt_regs());
+		putname(path);
+	}
+	return error;
+}
+#ifdef CONFIG_COMPAT
+asmlinkage long compat_sys_execve(const char __user * filename,
+	const compat_uptr_t __user * argv,
+	const compat_uptr_t __user * envp)
+{
+	const char *path = getname(filename);
+	int error = PTR_ERR(path);
+	if (!IS_ERR(path)) {
+		error = compat_do_execve(path, argv, envp, current_pt_regs());
+		putname(path);
+	}
+	return error;
+}
+#endif
+#endif
+
+#ifdef __ARCH_WANT_KERNEL_EXECVE
+int kernel_execve(const char *filename,
+		  const char *const argv[],
+		  const char *const envp[])
+{
+	struct pt_regs *p = current_pt_regs();
+	int ret;
+
+	ret = do_execve(filename,
+			(const char __user *const __user *)argv,
+			(const char __user *const __user *)envp, p);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * We were successful.  We won't be returning to our caller, but
+	 * instead to user space by manipulating the kernel stack.
+	 */
+	ret_from_kernel_execve(p);
+}
+#endif
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 37935c2..26531f3 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -19,6 +19,8 @@
 
 #ifdef __KERNEL__
 #include <linux/sched.h>
+#include <linux/unistd.h>
+#include <asm/exec.h>
 
 #define CORENAME_MAX_SIZE 128
 
@@ -135,5 +137,9 @@
 extern void set_binfmt(struct linux_binfmt *new);
 extern void free_bprm(struct linux_binprm *);
 
+#ifdef __ARCH_WANT_KERNEL_EXECVE
+extern void ret_from_kernel_execve(struct pt_regs *normal) __noreturn;
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_BINFMTS_H */
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 3f53d00..d0ced10 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -284,8 +284,12 @@
 		const struct compat_iovec __user *vec,
 		unsigned long vlen, u32 pos_low, u32 pos_high);
 
-int compat_do_execve(char *filename, compat_uptr_t __user *argv,
-		     compat_uptr_t __user *envp, struct pt_regs *regs);
+int compat_do_execve(const char *filename, const compat_uptr_t __user *argv,
+		     const compat_uptr_t __user *envp, struct pt_regs *regs);
+#ifdef __ARCH_WANT_SYS_EXECVE
+asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv,
+		     const compat_uptr_t __user *envp);
+#endif
 
 asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 		compat_ulong_t __user *outp, compat_ulong_t __user *exp,
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 3db698a..1d24ffa 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -401,6 +401,10 @@
 #define arch_ptrace_stop(code, info)		do { } while (0)
 #endif
 
+#ifndef current_pt_regs
+#define current_pt_regs() task_pt_regs(current)
+#endif
+
 extern int task_current_syscall(struct task_struct *target, long *callno,
 				unsigned long args[6], unsigned int maxargs,
 				unsigned long *sp, unsigned long *pc);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c2070e9..a83ca58 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2332,6 +2332,9 @@
 		     const char __user * const __user *, struct pt_regs *);
 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
+#ifdef CONFIG_GENERIC_KERNEL_THREAD
+extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+#endif
 
 extern void set_task_comm(struct task_struct *tsk, char *from);
 extern char *get_task_comm(char *to, struct task_struct *tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index 1cd7d581..8b20ab7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1584,7 +1584,7 @@
 	 * requested, no event is reported; otherwise, report if the event
 	 * for the type of forking is enabled.
 	 */
-	if (likely(user_mode(regs)) && !(clone_flags & CLONE_UNTRACED)) {
+	if (!(clone_flags & CLONE_UNTRACED) && likely(user_mode(regs))) {
 		if (clone_flags & CLONE_VFORK)
 			trace = PTRACE_EVENT_VFORK;
 		else if ((clone_flags & CSIGNAL) != SIGCHLD)
@@ -1634,6 +1634,17 @@
 	return nr;
 }
 
+#ifdef CONFIG_GENERIC_KERNEL_THREAD
+/*
+ * Create a kernel thread.
+ */
+pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+	return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, NULL,
+		(unsigned long)arg, NULL, NULL);
+}
+#endif
+
 #ifndef ARCH_MIN_MMSTRUCT_ALIGN
 #define ARCH_MIN_MMSTRUCT_ALIGN 0
 #endif