blob: 1086b5fcac21c51ad8acb01a02c6c8a6a971a8c7 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 *
8 * $Id$
9 */
10
11/*
12 * entry.S contains the system-call and fault low-level handling routines.
13 *
14 * NOTE: This code handles signal-recognition, which happens every time
15 * after an interrupt and after each system call.
16 *
17 * Normal syscalls and interrupts don't save a full stack frame, this is
18 * only done for syscall tracing, signals or fork/exec et.al.
19 *
20 * A note on terminology:
21 * - top of stack: Architecture defined interrupt frame from SS to RIP
22 * at the top of the kernel process stack.
23 * - partial stack frame: partially saved registers upto R11.
24 * - full stack frame: Like partial stack frame, but all register saved.
25 *
26 * TODO:
27 * - schedule it carefully for the final hardware.
28 */
29
30#define ASSEMBLY 1
31#include <linux/config.h>
32#include <linux/linkage.h>
33#include <asm/segment.h>
34#include <asm/smp.h>
35#include <asm/cache.h>
36#include <asm/errno.h>
37#include <asm/dwarf2.h>
38#include <asm/calling.h>
39#include <asm/offset.h>
40#include <asm/msr.h>
41#include <asm/unistd.h>
42#include <asm/thread_info.h>
43#include <asm/hw_irq.h>
44
45 .code64
46
Andi Kleendc37db42005-04-16 15:25:05 -070047#ifndef CONFIG_PREEMPT
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#define retint_kernel retint_restore_args
49#endif
50
51/*
52 * C code is not supposed to know about undefined top of stack. Every time
53 * a C function with an pt_regs argument is called from the SYSCALL based
54 * fast path FIXUP_TOP_OF_STACK is needed.
55 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
56 * manipulation.
57 */
58
59 /* %rsp:at FRAMEEND */
60 .macro FIXUP_TOP_OF_STACK tmp
61 movq %gs:pda_oldrsp,\tmp
62 movq \tmp,RSP(%rsp)
63 movq $__USER_DS,SS(%rsp)
64 movq $__USER_CS,CS(%rsp)
65 movq $-1,RCX(%rsp)
66 movq R11(%rsp),\tmp /* get eflags */
67 movq \tmp,EFLAGS(%rsp)
68 .endm
69
70 .macro RESTORE_TOP_OF_STACK tmp,offset=0
71 movq RSP-\offset(%rsp),\tmp
72 movq \tmp,%gs:pda_oldrsp
73 movq EFLAGS-\offset(%rsp),\tmp
74 movq \tmp,R11-\offset(%rsp)
75 .endm
76
77 .macro FAKE_STACK_FRAME child_rip
78 /* push in order ss, rsp, eflags, cs, rip */
79 xorq %rax, %rax
80 pushq %rax /* ss */
81 CFI_ADJUST_CFA_OFFSET 8
82 pushq %rax /* rsp */
83 CFI_ADJUST_CFA_OFFSET 8
84 CFI_OFFSET rip,0
85 pushq $(1<<9) /* eflags - interrupts on */
86 CFI_ADJUST_CFA_OFFSET 8
87 pushq $__KERNEL_CS /* cs */
88 CFI_ADJUST_CFA_OFFSET 8
89 pushq \child_rip /* rip */
90 CFI_ADJUST_CFA_OFFSET 8
91 CFI_OFFSET rip,0
92 pushq %rax /* orig rax */
93 CFI_ADJUST_CFA_OFFSET 8
94 .endm
95
96 .macro UNFAKE_STACK_FRAME
97 addq $8*6, %rsp
98 CFI_ADJUST_CFA_OFFSET -(6*8)
99 .endm
100
101 .macro CFI_DEFAULT_STACK
102 CFI_ADJUST_CFA_OFFSET (SS)
103 CFI_OFFSET r15,R15-SS
104 CFI_OFFSET r14,R14-SS
105 CFI_OFFSET r13,R13-SS
106 CFI_OFFSET r12,R12-SS
107 CFI_OFFSET rbp,RBP-SS
108 CFI_OFFSET rbx,RBX-SS
109 CFI_OFFSET r11,R11-SS
110 CFI_OFFSET r10,R10-SS
111 CFI_OFFSET r9,R9-SS
112 CFI_OFFSET r8,R8-SS
113 CFI_OFFSET rax,RAX-SS
114 CFI_OFFSET rcx,RCX-SS
115 CFI_OFFSET rdx,RDX-SS
116 CFI_OFFSET rsi,RSI-SS
117 CFI_OFFSET rdi,RDI-SS
118 CFI_OFFSET rsp,RSP-SS
119 CFI_OFFSET rip,RIP-SS
120 .endm
121/*
122 * A newly forked process directly context switches into this.
123 */
124/* rdi: prev */
125ENTRY(ret_from_fork)
126 CFI_STARTPROC
127 CFI_DEFAULT_STACK
128 call schedule_tail
129 GET_THREAD_INFO(%rcx)
130 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
131 jnz rff_trace
132rff_action:
133 RESTORE_REST
134 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
135 je int_ret_from_sys_call
136 testl $_TIF_IA32,threadinfo_flags(%rcx)
137 jnz int_ret_from_sys_call
138 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
139 jmp ret_from_sys_call
140rff_trace:
141 movq %rsp,%rdi
142 call syscall_trace_leave
143 GET_THREAD_INFO(%rcx)
144 jmp rff_action
145 CFI_ENDPROC
146
147/*
148 * System call entry. Upto 6 arguments in registers are supported.
149 *
150 * SYSCALL does not save anything on the stack and does not change the
151 * stack pointer.
152 */
153
154/*
155 * Register setup:
156 * rax system call number
157 * rdi arg0
158 * rcx return address for syscall/sysret, C arg3
159 * rsi arg1
160 * rdx arg2
161 * r10 arg3 (--> moved to rcx for C)
162 * r8 arg4
163 * r9 arg5
164 * r11 eflags for syscall/sysret, temporary for C
165 * r12-r15,rbp,rbx saved by C code, not touched.
166 *
167 * Interrupts are off on entry.
168 * Only called from user space.
169 *
170 * XXX if we had a free scratch register we could save the RSP into the stack frame
171 * and report it properly in ps. Unfortunately we haven't.
172 */
173
174ENTRY(system_call)
175 CFI_STARTPROC
176 swapgs
177 movq %rsp,%gs:pda_oldrsp
178 movq %gs:pda_kernelstack,%rsp
179 sti
180 SAVE_ARGS 8,1
181 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
182 movq %rcx,RIP-ARGOFFSET(%rsp)
183 GET_THREAD_INFO(%rcx)
184 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
185 jnz tracesys
186 cmpq $__NR_syscall_max,%rax
187 ja badsys
188 movq %r10,%rcx
189 call *sys_call_table(,%rax,8) # XXX: rip relative
190 movq %rax,RAX-ARGOFFSET(%rsp)
191/*
192 * Syscall return path ending with SYSRET (fast path)
193 * Has incomplete stack frame and undefined top of stack.
194 */
195 .globl ret_from_sys_call
196ret_from_sys_call:
Andi Kleen11b854b2005-04-16 15:25:02 -0700197 movl $_TIF_ALLWORK_MASK,%edi
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198 /* edi: flagmask */
199sysret_check:
200 GET_THREAD_INFO(%rcx)
201 cli
202 movl threadinfo_flags(%rcx),%edx
203 andl %edi,%edx
204 jnz sysret_careful
205 movq RIP-ARGOFFSET(%rsp),%rcx
206 RESTORE_ARGS 0,-ARG_SKIP,1
207 movq %gs:pda_oldrsp,%rsp
208 swapgs
209 sysretq
210
211 /* Handle reschedules */
212 /* edx: work, edi: workmask */
213sysret_careful:
214 bt $TIF_NEED_RESCHED,%edx
215 jnc sysret_signal
216 sti
217 pushq %rdi
218 call schedule
219 popq %rdi
220 jmp sysret_check
221
222 /* Handle a signal */
Andi Kleen11b854b2005-04-16 15:25:02 -0700223 /* edx: work flags (arg3) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224sysret_signal:
225 sti
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226 leaq do_notify_resume(%rip),%rax
227 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
228 xorl %esi,%esi # oldset -> arg2
229 call ptregscall_common
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230 jmp sysret_check
231
232 /* Do syscall tracing */
233tracesys:
234 SAVE_REST
235 movq $-ENOSYS,RAX(%rsp)
236 FIXUP_TOP_OF_STACK %rdi
237 movq %rsp,%rdi
238 call syscall_trace_enter
239 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
240 RESTORE_REST
241 cmpq $__NR_syscall_max,%rax
242 ja 1f
243 movq %r10,%rcx /* fixup for C */
244 call *sys_call_table(,%rax,8)
245 movq %rax,RAX-ARGOFFSET(%rsp)
2461: SAVE_REST
247 movq %rsp,%rdi
248 call syscall_trace_leave
249 RESTORE_TOP_OF_STACK %rbx
250 RESTORE_REST
251 jmp ret_from_sys_call
252
253badsys:
254 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
255 jmp ret_from_sys_call
256
257/*
258 * Syscall return path ending with IRET.
259 * Has correct top of stack, but partial stack frame.
260 */
261ENTRY(int_ret_from_sys_call)
262 cli
263 testl $3,CS-ARGOFFSET(%rsp)
264 je retint_restore_args
265 movl $_TIF_ALLWORK_MASK,%edi
266 /* edi: mask to check */
267int_with_check:
268 GET_THREAD_INFO(%rcx)
269 movl threadinfo_flags(%rcx),%edx
270 andl %edi,%edx
271 jnz int_careful
272 jmp retint_swapgs
273
274 /* Either reschedule or signal or syscall exit tracking needed. */
275 /* First do a reschedule test. */
276 /* edx: work, edi: workmask */
277int_careful:
278 bt $TIF_NEED_RESCHED,%edx
279 jnc int_very_careful
280 sti
281 pushq %rdi
282 call schedule
283 popq %rdi
Andi Kleencdd219c2005-04-16 15:25:04 -0700284 cli
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 jmp int_with_check
286
287 /* handle signals and tracing -- both require a full stack frame */
288int_very_careful:
289 sti
290 SAVE_REST
291 /* Check for syscall exit trace */
292 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
293 jz int_signal
294 pushq %rdi
295 leaq 8(%rsp),%rdi # &ptregs -> arg1
296 call syscall_trace_leave
297 popq %rdi
Andi Kleen36c11042005-04-16 15:25:01 -0700298 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
Andi Kleenbe9e6872005-05-01 08:58:51 -0700299 cli
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300 jmp int_restore_rest
301
302int_signal:
303 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
304 jz 1f
305 movq %rsp,%rdi # &ptregs -> arg1
306 xorl %esi,%esi # oldset -> arg2
307 call do_notify_resume
3081: movl $_TIF_NEED_RESCHED,%edi
309int_restore_rest:
310 RESTORE_REST
Andi Kleenbe9e6872005-05-01 08:58:51 -0700311 cli
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312 jmp int_with_check
313 CFI_ENDPROC
314
315/*
316 * Certain special system calls that need to save a complete full stack frame.
317 */
318
319 .macro PTREGSCALL label,func,arg
320 .globl \label
321\label:
322 leaq \func(%rip),%rax
323 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
324 jmp ptregscall_common
325 .endm
326
327 PTREGSCALL stub_clone, sys_clone, %r8
328 PTREGSCALL stub_fork, sys_fork, %rdi
329 PTREGSCALL stub_vfork, sys_vfork, %rdi
330 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
331 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
332 PTREGSCALL stub_iopl, sys_iopl, %rsi
333
334ENTRY(ptregscall_common)
335 CFI_STARTPROC
336 popq %r11
337 CFI_ADJUST_CFA_OFFSET -8
338 SAVE_REST
339 movq %r11, %r15
340 FIXUP_TOP_OF_STACK %r11
341 call *%rax
342 RESTORE_TOP_OF_STACK %r11
343 movq %r15, %r11
344 RESTORE_REST
345 pushq %r11
346 CFI_ADJUST_CFA_OFFSET 8
347 ret
348 CFI_ENDPROC
349
350ENTRY(stub_execve)
351 CFI_STARTPROC
352 popq %r11
353 CFI_ADJUST_CFA_OFFSET -8
354 SAVE_REST
355 movq %r11, %r15
356 FIXUP_TOP_OF_STACK %r11
357 call sys_execve
358 GET_THREAD_INFO(%rcx)
359 bt $TIF_IA32,threadinfo_flags(%rcx)
360 jc exec_32bit
361 RESTORE_TOP_OF_STACK %r11
362 movq %r15, %r11
363 RESTORE_REST
364 push %r11
365 ret
366
367exec_32bit:
368 CFI_ADJUST_CFA_OFFSET REST_SKIP
369 movq %rax,RAX(%rsp)
370 RESTORE_REST
371 jmp int_ret_from_sys_call
372 CFI_ENDPROC
373
374/*
375 * sigreturn is special because it needs to restore all registers on return.
376 * This cannot be done with SYSRET, so use the IRET return path instead.
377 */
378ENTRY(stub_rt_sigreturn)
379 CFI_STARTPROC
380 addq $8, %rsp
381 SAVE_REST
382 movq %rsp,%rdi
383 FIXUP_TOP_OF_STACK %r11
384 call sys_rt_sigreturn
385 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
386 RESTORE_REST
387 jmp int_ret_from_sys_call
388 CFI_ENDPROC
389
390/*
391 * Interrupt entry/exit.
392 *
393 * Interrupt entry points save only callee clobbered registers in fast path.
394 *
395 * Entry runs with interrupts off.
396 */
397
398/* 0(%rsp): interrupt number */
399 .macro interrupt func
400 CFI_STARTPROC simple
401 CFI_DEF_CFA rsp,(SS-RDI)
402 CFI_REL_OFFSET rsp,(RSP-ORIG_RAX)
403 CFI_REL_OFFSET rip,(RIP-ORIG_RAX)
404 cld
405#ifdef CONFIG_DEBUG_INFO
406 SAVE_ALL
407 movq %rsp,%rdi
408 /*
409 * Setup a stack frame pointer. This allows gdb to trace
410 * back to the original stack.
411 */
412 movq %rsp,%rbp
413 CFI_DEF_CFA_REGISTER rbp
414#else
415 SAVE_ARGS
416 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
417#endif
418 testl $3,CS(%rdi)
419 je 1f
420 swapgs
4211: addl $1,%gs:pda_irqcount # RED-PEN should check preempt count
422 movq %gs:pda_irqstackptr,%rax
423 cmoveq %rax,%rsp
424 pushq %rdi # save old stack
425 call \func
426 .endm
427
428ENTRY(common_interrupt)
429 interrupt do_IRQ
430 /* 0(%rsp): oldrsp-ARGOFFSET */
431ret_from_intr:
432 popq %rdi
433 cli
434 subl $1,%gs:pda_irqcount
435#ifdef CONFIG_DEBUG_INFO
436 movq RBP(%rdi),%rbp
437#endif
438 leaq ARGOFFSET(%rdi),%rsp
439exit_intr:
440 GET_THREAD_INFO(%rcx)
441 testl $3,CS-ARGOFFSET(%rsp)
442 je retint_kernel
443
444 /* Interrupt came from user space */
445 /*
446 * Has a correct top of stack, but a partial stack frame
447 * %rcx: thread info. Interrupts off.
448 */
449retint_with_reschedule:
450 movl $_TIF_WORK_MASK,%edi
451retint_check:
452 movl threadinfo_flags(%rcx),%edx
453 andl %edi,%edx
454 jnz retint_careful
455retint_swapgs:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 swapgs
457retint_restore_args:
458 cli
459 RESTORE_ARGS 0,8,0
460iret_label:
461 iretq
462
463 .section __ex_table,"a"
464 .quad iret_label,bad_iret
465 .previous
466 .section .fixup,"ax"
467 /* force a signal here? this matches i386 behaviour */
468 /* running with kernel gs */
469bad_iret:
470 movq $-9999,%rdi /* better code? */
471 jmp do_exit
472 .previous
473
474 /* edi: workmask, edx: work */
475retint_careful:
476 bt $TIF_NEED_RESCHED,%edx
477 jnc retint_signal
478 sti
479 pushq %rdi
480 call schedule
481 popq %rdi
482 GET_THREAD_INFO(%rcx)
483 cli
484 jmp retint_check
485
486retint_signal:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700487 sti
488 SAVE_REST
489 movq $-1,ORIG_RAX(%rsp)
490 xorq %rsi,%rsi # oldset
491 movq %rsp,%rdi # &pt_regs
492 call do_notify_resume
493 RESTORE_REST
494 cli
Andi Kleenbe9e6872005-05-01 08:58:51 -0700495 GET_THREAD_INFO(%rcx)
496 movl $_TIF_WORK_MASK,%edi
Linus Torvalds1da177e2005-04-16 15:20:36 -0700497 jmp retint_check
498
499#ifdef CONFIG_PREEMPT
500 /* Returning to kernel space. Check if we need preemption */
501 /* rcx: threadinfo. interrupts off. */
502 .p2align
503retint_kernel:
504 cmpl $0,threadinfo_preempt_count(%rcx)
505 jnz retint_restore_args
506 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
507 jnc retint_restore_args
508 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
509 jnc retint_restore_args
510 call preempt_schedule_irq
511 jmp exit_intr
512#endif
513 CFI_ENDPROC
514
515/*
516 * APIC interrupts.
517 */
518 .macro apicinterrupt num,func
519 pushq $\num-256
520 interrupt \func
521 jmp ret_from_intr
522 CFI_ENDPROC
523 .endm
524
525ENTRY(thermal_interrupt)
526 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
527
528#ifdef CONFIG_SMP
529ENTRY(reschedule_interrupt)
530 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
531
532ENTRY(invalidate_interrupt)
533 apicinterrupt INVALIDATE_TLB_VECTOR,smp_invalidate_interrupt
534
535ENTRY(call_function_interrupt)
536 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
537#endif
538
539#ifdef CONFIG_X86_LOCAL_APIC
540ENTRY(apic_timer_interrupt)
541 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
542
543ENTRY(error_interrupt)
544 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
545
546ENTRY(spurious_interrupt)
547 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
548#endif
549
550/*
551 * Exception entry points.
552 */
553 .macro zeroentry sym
554 pushq $0 /* push error code/oldrax */
555 pushq %rax /* push real oldrax to the rdi slot */
556 leaq \sym(%rip),%rax
557 jmp error_entry
558 .endm
559
560 .macro errorentry sym
561 pushq %rax
562 leaq \sym(%rip),%rax
563 jmp error_entry
564 .endm
565
566 /* error code is on the stack already */
567 /* handle NMI like exceptions that can happen everywhere */
568 .macro paranoidentry sym
569 SAVE_ALL
570 cld
571 movl $1,%ebx
572 movl $MSR_GS_BASE,%ecx
573 rdmsr
574 testl %edx,%edx
575 js 1f
576 swapgs
577 xorl %ebx,%ebx
5781: movq %rsp,%rdi
579 movq ORIG_RAX(%rsp),%rsi
580 movq $-1,ORIG_RAX(%rsp)
581 call \sym
Andi Kleen6fefb0d2005-04-16 15:25:03 -0700582 cli
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 .endm
584
585/*
586 * Exception entry point. This expects an error code/orig_rax on the stack
587 * and the exception handler in %rax.
588 */
589ENTRY(error_entry)
590 CFI_STARTPROC simple
591 CFI_DEF_CFA rsp,(SS-RDI)
592 CFI_REL_OFFSET rsp,(RSP-RDI)
593 CFI_REL_OFFSET rip,(RIP-RDI)
594 /* rdi slot contains rax, oldrax contains error code */
595 cld
596 subq $14*8,%rsp
597 CFI_ADJUST_CFA_OFFSET (14*8)
598 movq %rsi,13*8(%rsp)
599 CFI_REL_OFFSET rsi,RSI
600 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
601 movq %rdx,12*8(%rsp)
602 CFI_REL_OFFSET rdx,RDX
603 movq %rcx,11*8(%rsp)
604 CFI_REL_OFFSET rcx,RCX
605 movq %rsi,10*8(%rsp) /* store rax */
606 CFI_REL_OFFSET rax,RAX
607 movq %r8, 9*8(%rsp)
608 CFI_REL_OFFSET r8,R8
609 movq %r9, 8*8(%rsp)
610 CFI_REL_OFFSET r9,R9
611 movq %r10,7*8(%rsp)
612 CFI_REL_OFFSET r10,R10
613 movq %r11,6*8(%rsp)
614 CFI_REL_OFFSET r11,R11
615 movq %rbx,5*8(%rsp)
616 CFI_REL_OFFSET rbx,RBX
617 movq %rbp,4*8(%rsp)
618 CFI_REL_OFFSET rbp,RBP
619 movq %r12,3*8(%rsp)
620 CFI_REL_OFFSET r12,R12
621 movq %r13,2*8(%rsp)
622 CFI_REL_OFFSET r13,R13
623 movq %r14,1*8(%rsp)
624 CFI_REL_OFFSET r14,R14
625 movq %r15,(%rsp)
626 CFI_REL_OFFSET r15,R15
627 xorl %ebx,%ebx
628 testl $3,CS(%rsp)
629 je error_kernelspace
630error_swapgs:
631 swapgs
632error_sti:
633 movq %rdi,RDI(%rsp)
634 movq %rsp,%rdi
635 movq ORIG_RAX(%rsp),%rsi /* get error code */
636 movq $-1,ORIG_RAX(%rsp)
637 call *%rax
638 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
639error_exit:
640 movl %ebx,%eax
641 RESTORE_REST
642 cli
643 GET_THREAD_INFO(%rcx)
644 testl %eax,%eax
645 jne retint_kernel
646 movl threadinfo_flags(%rcx),%edx
647 movl $_TIF_WORK_MASK,%edi
648 andl %edi,%edx
649 jnz retint_careful
650 swapgs
651 RESTORE_ARGS 0,8,0
652 iretq
653 CFI_ENDPROC
654
655error_kernelspace:
656 incl %ebx
657 /* There are two places in the kernel that can potentially fault with
658 usergs. Handle them here. The exception handlers after
659 iret run with kernel gs again, so don't set the user space flag.
660 B stepping K8s sometimes report an truncated RIP for IRET
661 exceptions returning to compat mode. Check for these here too. */
662 leaq iret_label(%rip),%rbp
663 cmpq %rbp,RIP(%rsp)
664 je error_swapgs
665 movl %ebp,%ebp /* zero extend */
666 cmpq %rbp,RIP(%rsp)
667 je error_swapgs
668 cmpq $gs_change,RIP(%rsp)
669 je error_swapgs
670 jmp error_sti
671
672 /* Reload gs selector with exception handling */
673 /* edi: new selector */
674ENTRY(load_gs_index)
675 pushf
676 cli
677 swapgs
678gs_change:
679 movl %edi,%gs
6802: mfence /* workaround */
681 swapgs
682 popf
683 ret
684
685 .section __ex_table,"a"
686 .align 8
687 .quad gs_change,bad_gs
688 .previous
689 .section .fixup,"ax"
690 /* running with kernelgs */
691bad_gs:
692 swapgs /* switch back to user gs */
693 xorl %eax,%eax
694 movl %eax,%gs
695 jmp 2b
696 .previous
697
698/*
699 * Create a kernel thread.
700 *
701 * C extern interface:
702 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
703 *
704 * asm input arguments:
705 * rdi: fn, rsi: arg, rdx: flags
706 */
707ENTRY(kernel_thread)
708 CFI_STARTPROC
709 FAKE_STACK_FRAME $child_rip
710 SAVE_ALL
711
712 # rdi: flags, rsi: usp, rdx: will be &pt_regs
713 movq %rdx,%rdi
714 orq kernel_thread_flags(%rip),%rdi
715 movq $-1, %rsi
716 movq %rsp, %rdx
717
718 xorl %r8d,%r8d
719 xorl %r9d,%r9d
720
721 # clone now
722 call do_fork
723 movq %rax,RAX(%rsp)
724 xorl %edi,%edi
725
726 /*
727 * It isn't worth to check for reschedule here,
728 * so internally to the x86_64 port you can rely on kernel_thread()
729 * not to reschedule the child before returning, this avoids the need
730 * of hacks for example to fork off the per-CPU idle tasks.
731 * [Hopefully no generic code relies on the reschedule -AK]
732 */
733 RESTORE_ALL
734 UNFAKE_STACK_FRAME
735 ret
736 CFI_ENDPROC
737
738
739child_rip:
740 /*
741 * Here we are in the child and the registers are set as they were
742 * at kernel_thread() invocation in the parent.
743 */
744 movq %rdi, %rax
745 movq %rsi, %rdi
746 call *%rax
747 # exit
748 xorq %rdi, %rdi
749 call do_exit
750
751/*
752 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
753 *
754 * C extern interface:
755 * extern long execve(char *name, char **argv, char **envp)
756 *
757 * asm input arguments:
758 * rdi: name, rsi: argv, rdx: envp
759 *
760 * We want to fallback into:
761 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
762 *
763 * do_sys_execve asm fallback arguments:
764 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
765 */
766ENTRY(execve)
767 CFI_STARTPROC
768 FAKE_STACK_FRAME $0
769 SAVE_ALL
770 call sys_execve
771 movq %rax, RAX(%rsp)
772 RESTORE_REST
773 testq %rax,%rax
774 je int_ret_from_sys_call
775 RESTORE_ARGS
776 UNFAKE_STACK_FRAME
777 ret
778 CFI_ENDPROC
779
780ENTRY(page_fault)
781 errorentry do_page_fault
782
783ENTRY(coprocessor_error)
784 zeroentry do_coprocessor_error
785
786ENTRY(simd_coprocessor_error)
787 zeroentry do_simd_coprocessor_error
788
789ENTRY(device_not_available)
790 zeroentry math_state_restore
791
792 /* runs on exception stack */
793ENTRY(debug)
794 CFI_STARTPROC
795 pushq $0
796 CFI_ADJUST_CFA_OFFSET 8
797 paranoidentry do_debug
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 jmp paranoid_exit
799 CFI_ENDPROC
800
801 /* runs on exception stack */
802ENTRY(nmi)
803 CFI_STARTPROC
804 pushq $-1
805 CFI_ADJUST_CFA_OFFSET 8
806 paranoidentry do_nmi
Andi Kleen6fefb0d2005-04-16 15:25:03 -0700807 /*
808 * "Paranoid" exit path from exception stack.
809 * Paranoid because this is used by NMIs and cannot take
810 * any kernel state for granted.
811 * We don't do kernel preemption checks here, because only
812 * NMI should be common and it does not enable IRQs and
813 * cannot get reschedule ticks.
814 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 /* ebx: no swapgs flag */
816paranoid_exit:
817 testl %ebx,%ebx /* swapgs needed? */
818 jnz paranoid_restore
Andi Kleen6fefb0d2005-04-16 15:25:03 -0700819 testl $3,CS(%rsp)
820 jnz paranoid_userspace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821paranoid_swapgs:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822 swapgs
823paranoid_restore:
824 RESTORE_ALL 8
825 iretq
826paranoid_userspace:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 GET_THREAD_INFO(%rcx)
Andi Kleen6fefb0d2005-04-16 15:25:03 -0700828 movl threadinfo_flags(%rcx),%ebx
829 andl $_TIF_WORK_MASK,%ebx
Andi Kleen11b854b2005-04-16 15:25:02 -0700830 jz paranoid_swapgs
Andi Kleen6fefb0d2005-04-16 15:25:03 -0700831 movq %rsp,%rdi /* &pt_regs */
832 call sync_regs
833 movq %rax,%rsp /* switch stack for scheduling */
834 testl $_TIF_NEED_RESCHED,%ebx
835 jnz paranoid_schedule
836 movl %ebx,%edx /* arg3: thread flags */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 sti
Andi Kleen6fefb0d2005-04-16 15:25:03 -0700838 xorl %esi,%esi /* arg2: oldset */
839 movq %rsp,%rdi /* arg1: &pt_regs */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840 call do_notify_resume
Andi Kleen6fefb0d2005-04-16 15:25:03 -0700841 cli
842 jmp paranoid_userspace
843paranoid_schedule:
Andi Kleen11b854b2005-04-16 15:25:02 -0700844 sti
845 call schedule
Andi Kleen6fefb0d2005-04-16 15:25:03 -0700846 cli
847 jmp paranoid_userspace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 CFI_ENDPROC
Andi Kleen6fefb0d2005-04-16 15:25:03 -0700849
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850ENTRY(int3)
851 zeroentry do_int3
852
853ENTRY(overflow)
854 zeroentry do_overflow
855
856ENTRY(bounds)
857 zeroentry do_bounds
858
859ENTRY(invalid_op)
860 zeroentry do_invalid_op
861
862ENTRY(coprocessor_segment_overrun)
863 zeroentry do_coprocessor_segment_overrun
864
865ENTRY(reserved)
866 zeroentry do_reserved
867
868 /* runs on exception stack */
869ENTRY(double_fault)
870 CFI_STARTPROC
871 paranoidentry do_double_fault
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872 jmp paranoid_exit
873 CFI_ENDPROC
874
875ENTRY(invalid_TSS)
876 errorentry do_invalid_TSS
877
878ENTRY(segment_not_present)
879 errorentry do_segment_not_present
880
881 /* runs on exception stack */
882ENTRY(stack_segment)
883 CFI_STARTPROC
884 paranoidentry do_stack_segment
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885 jmp paranoid_exit
886 CFI_ENDPROC
887
888ENTRY(general_protection)
889 errorentry do_general_protection
890
891ENTRY(alignment_check)
892 errorentry do_alignment_check
893
894ENTRY(divide_error)
895 zeroentry do_divide_error
896
897ENTRY(spurious_interrupt_bug)
898 zeroentry do_spurious_interrupt_bug
899
900#ifdef CONFIG_X86_MCE
901 /* runs on exception stack */
902ENTRY(machine_check)
903 CFI_STARTPROC
904 pushq $0
905 CFI_ADJUST_CFA_OFFSET 8
906 paranoidentry do_machine_check
907 jmp paranoid_exit
908 CFI_ENDPROC
909#endif
910
911ENTRY(call_debug)
912 zeroentry do_call_debug
913