x86: split ret_from_fork
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1a..d93eb9d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -97,6 +97,7 @@
select KTIME_SCALAR if X86_32
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
+ select GENERIC_KERNEL_THREAD
config INSTRUCTION_DECODER
def_bool (KPROBES || PERF_EVENTS || UPROBES)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index d048cad..078f3fd 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -589,11 +589,6 @@
} mm_segment_t;
-/*
- * create a kernel thread without removing it from tasklists
- */
-extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 623f288..ac11073 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -994,15 +994,20 @@
*/
.popsection
-ENTRY(kernel_thread_helper)
- pushl $0 # fake return address for unwinder
+ENTRY(ret_from_kernel_thread)
CFI_STARTPROC
- movl %edi,%eax
- call *%esi
+ pushl_cfi %eax
+ call schedule_tail
+ GET_THREAD_INFO(%ebp)
+ popl_cfi %eax
+ pushl_cfi $0x0202 # Reset kernel eflags
+ popfl_cfi
+ movl PT_EBP(%esp),%eax
+ call *PT_EBX(%esp)
call do_exit
ud2 # padding for call trace
CFI_ENDPROC
-ENDPROC(kernel_thread_helper)
+ENDPROC(ret_from_kernel_thread)
#ifdef CONFIG_XEN
/* Xen doesn't set %esp to be precisely what the normal sysenter
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 69babd8..5526d17 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -450,7 +450,7 @@
RESTORE_REST
testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
- jz retint_restore_args
+ jz 1f
testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
jnz int_ret_from_sys_call
@@ -458,6 +458,16 @@
RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
jmp ret_from_sys_call # go to the SYSRET fastpath
+1:
+ subq $REST_SKIP, %rsp # move the stack pointer back
+ CFI_ADJUST_CFA_OFFSET REST_SKIP
+ movq %rbp, %rdi
+ call *%rbx
+ # exit
+ mov %eax, %edi
+ call do_exit
+ ud2 # padding for call trace
+
CFI_ENDPROC
END(ret_from_fork)
@@ -1206,21 +1216,6 @@
jmp 2b
.previous
-ENTRY(kernel_thread_helper)
- pushq $0 # fake return address
- CFI_STARTPROC
- /*
- * Here we are in the child and the registers are set as they were
- * at kernel_thread() invocation in the parent.
- */
- call *%rsi
- # exit
- mov %eax, %edi
- call do_exit
- ud2 # padding for call trace
- CFI_ENDPROC
-END(kernel_thread_helper)
-
/*
* execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
*
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 7162e9c..6947ec9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -299,44 +299,6 @@
}
/*
- * This gets run with %si containing the
- * function to call, and %di containing
- * the "args".
- */
-extern void kernel_thread_helper(void);
-
-/*
- * Create a kernel thread
- */
-int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-{
- struct pt_regs regs;
-
- memset(®s, 0, sizeof(regs));
-
- regs.si = (unsigned long) fn;
- regs.di = (unsigned long) arg;
-
-#ifdef CONFIG_X86_32
- regs.ds = __USER_DS;
- regs.es = __USER_DS;
- regs.fs = __KERNEL_PERCPU;
- regs.gs = __KERNEL_STACK_CANARY;
-#else
- regs.ss = __KERNEL_DS;
-#endif
-
- regs.orig_ax = -1;
- regs.ip = (unsigned long) kernel_thread_helper;
- regs.cs = __KERNEL_CS | get_kernel_rpl();
- regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
-
- /* Ok, create the new process.. */
- return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
-/*
* sys_execve() executes a new program.
*/
long sys_execve(const char __user *name,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 75fcad1..c993987 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -57,6 +57,7 @@
#include <asm/switch_to.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
/*
* Return saved PC of a blocked thread.
@@ -127,23 +128,39 @@
}
int copy_thread(unsigned long clone_flags, unsigned long sp,
- unsigned long unused,
+ unsigned long arg,
struct task_struct *p, struct pt_regs *regs)
{
- struct pt_regs *childregs;
+ struct pt_regs *childregs = task_pt_regs(p);
struct task_struct *tsk;
int err;
- childregs = task_pt_regs(p);
- *childregs = *regs;
- childregs->ax = 0;
- childregs->sp = sp;
-
p->thread.sp = (unsigned long) childregs;
p->thread.sp0 = (unsigned long) (childregs+1);
- p->thread.ip = (unsigned long) ret_from_fork;
+ if (unlikely(!regs)) {
+ /* kernel thread */
+ memset(childregs, 0, sizeof(struct pt_regs));
+ p->thread.ip = (unsigned long) ret_from_kernel_thread;
+ task_user_gs(p) = __KERNEL_STACK_CANARY;
+ childregs->ds = __USER_DS;
+ childregs->es = __USER_DS;
+ childregs->fs = __KERNEL_PERCPU;
+ childregs->bx = sp; /* function */
+ childregs->bp = arg;
+ childregs->orig_ax = -1;
+ childregs->cs = __KERNEL_CS | get_kernel_rpl();
+ childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+ p->fpu_counter = 0;
+ p->thread.io_bitmap_ptr = NULL;
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+ return 0;
+ }
+ *childregs = *regs;
+ childregs->ax = 0;
+ childregs->sp = sp;
+ p->thread.ip = (unsigned long) ret_from_fork;
task_user_gs(p) = get_user_gs(regs);
p->fpu_counter = 0;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 0a980c9..937f2af 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -146,29 +146,18 @@
}
int copy_thread(unsigned long clone_flags, unsigned long sp,
- unsigned long unused,
+ unsigned long arg,
struct task_struct *p, struct pt_regs *regs)
{
int err;
struct pt_regs *childregs;
struct task_struct *me = current;
- childregs = ((struct pt_regs *)
- (THREAD_SIZE + task_stack_page(p))) - 1;
- *childregs = *regs;
-
- childregs->ax = 0;
- if (user_mode(regs))
- childregs->sp = sp;
- else
- childregs->sp = (unsigned long)childregs;
-
+ p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
+ childregs = task_pt_regs(p);
p->thread.sp = (unsigned long) childregs;
- p->thread.sp0 = (unsigned long) (childregs+1);
p->thread.usersp = me->thread.usersp;
-
set_tsk_thread_flag(p, TIF_FORK);
-
p->fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;
@@ -178,6 +167,24 @@
p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
+ if (unlikely(!regs)) {
+ /* kernel thread */
+ memset(childregs, 0, sizeof(struct pt_regs));
+ childregs->sp = (unsigned long)childregs;
+ childregs->ss = __KERNEL_DS;
+ childregs->bx = sp; /* function */
+ childregs->bp = arg;
+ childregs->orig_ax = -1;
+ childregs->cs = __KERNEL_CS | get_kernel_rpl();
+ childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+ return 0;
+ }
+ *childregs = *regs;
+
+ childregs->ax = 0;
+ childregs->sp = sp;
err = -ENOMEM;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));