blob: 082d173caaf3f3e4ea02deffd3ed47b1fe8f9bca [file] [log] [blame]
Jeremy Fitzhardinge64876732007-07-17 18:37:07 -07001/*
2 Asm versions of Xen pv-ops, suitable for either direct use or inlining.
3 The inline versions are the same as the direct-use versions, with the
4 pre- and post-amble chopped off.
5
6 This code is encoded for size rather than absolute efficiency,
7 with a view to being able to inline as much as possible.
8
9 We only bother with direct forms (ie, vcpu in pda) of the operations
10 here; the indirect forms are better handled in C, since they're
11 generally too large to inline anyway.
12 */
13
Jeremy Fitzhardinge53937442009-02-02 13:55:42 -080014//#include <asm/asm-offsets.h>
Jeremy Fitzhardinge64876732007-07-17 18:37:07 -070015#include <asm/thread_info.h>
Jeremy Fitzhardinge64876732007-07-17 18:37:07 -070016#include <asm/processor-flags.h>
Jeremy Fitzhardinge9ec2b802007-07-17 18:37:07 -070017#include <asm/segment.h>
18
19#include <xen/interface/xen.h>
Jeremy Fitzhardinge64876732007-07-17 18:37:07 -070020
Jeremy Fitzhardinge53937442009-02-02 13:55:42 -080021#include "xen-asm.h"
Jeremy Fitzhardinge9ec2b802007-07-17 18:37:07 -070022
Jeremy Fitzhardinge64876732007-07-17 18:37:07 -070023/*
Jeremy Fitzhardinge53937442009-02-02 13:55:42 -080024 Force an event check by making a hypercall,
25 but preserve regs before making the call.
Jeremy Fitzhardinge64876732007-07-17 18:37:07 -070026 */
Jeremy Fitzhardinge53937442009-02-02 13:55:42 -080027check_events:
28 push %eax
29 push %ecx
30 push %edx
31 call xen_force_evtchn_callback
32 pop %edx
33 pop %ecx
34 pop %eax
Jeremy Fitzhardinge64876732007-07-17 18:37:07 -070035 ret
Jeremy Fitzhardinge64876732007-07-17 18:37:07 -070036
Jeremy Fitzhardinge9ec2b802007-07-17 18:37:07 -070037/*
Jeremy Fitzhardingeb77797f2008-04-02 10:54:11 -070038 We can't use sysexit directly, because we're not running in ring0.
39 But we can easily fake it up using iret. Assuming xen_sysexit
40 is jumped to with a standard stack frame, we can just strip it
41 back to a standard iret frame and use iret.
42 */
43ENTRY(xen_sysexit)
44 movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
45 orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
46 lea PT_EIP(%esp), %esp
47
48 jmp xen_iret
49ENDPROC(xen_sysexit)
50
51/*
Jeremy Fitzhardinge9ec2b802007-07-17 18:37:07 -070052 This is run where a normal iret would be run, with the same stack setup:
53 8: eflags
54 4: cs
55 esp-> 0: eip
56
57 This attempts to make sure that any pending events are dealt
58 with on return to usermode, but there is a small window in
59 which an event can happen just before entering usermode. If
60 the nested interrupt ends up setting one of the TIF_WORK_MASK
61 pending work flags, they will not be tested again before
62 returning to usermode. This means that a process can end up
63 with pending work, which will be unprocessed until the process
64 enters and leaves the kernel again, which could be an
65 unbounded amount of time. This means that a pending signal or
66 reschedule event could be indefinitely delayed.
67
68 The fix is to notice a nested interrupt in the critical
69 window, and if one occurs, then fold the nested interrupt into
70 the current interrupt stack frame, and re-process it
71 iteratively rather than recursively. This means that it will
72 exit via the normal path, and all pending work will be dealt
73 with appropriately.
74
75 Because the nested interrupt handler needs to deal with the
76 current stack state in whatever form its in, we keep things
77 simple by only using a single register which is pushed/popped
78 on the stack.
Jeremy Fitzhardinge9ec2b802007-07-17 18:37:07 -070079 */
Jeremy Fitzhardinge81e103f2008-04-17 17:40:51 +020080ENTRY(xen_iret)
Jeremy Fitzhardinge9ec2b802007-07-17 18:37:07 -070081 /* test eflags for special cases */
82 testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
83 jnz hyper_iret
84
85 push %eax
86 ESP_OFFSET=4 # bytes pushed onto stack
87
88 /* Store vcpu_info pointer for easy access. Do it this
89 way to avoid having to reload %fs */
90#ifdef CONFIG_SMP
91 GET_THREAD_INFO(%eax)
92 movl TI_cpu(%eax),%eax
93 movl __per_cpu_offset(,%eax,4),%eax
Jeremy Fitzhardinge81e103f2008-04-17 17:40:51 +020094 mov per_cpu__xen_vcpu(%eax),%eax
Jeremy Fitzhardinge9ec2b802007-07-17 18:37:07 -070095#else
Jeremy Fitzhardinge81e103f2008-04-17 17:40:51 +020096 movl per_cpu__xen_vcpu, %eax
Jeremy Fitzhardinge9ec2b802007-07-17 18:37:07 -070097#endif
98
99 /* check IF state we're restoring */
100 testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
101
102 /* Maybe enable events. Once this happens we could get a
103 recursive event, so the critical region starts immediately
104 afterwards. However, if that happens we don't end up
105 resuming the code, so we don't have to be worried about
106 being preempted to another CPU. */
107 setz XEN_vcpu_info_mask(%eax)
108xen_iret_start_crit:
109
110 /* check for unmasked and pending */
111 cmpw $0x0001, XEN_vcpu_info_pending(%eax)
112
113 /* If there's something pending, mask events again so we
114 can jump back into xen_hypervisor_callback */
115 sete XEN_vcpu_info_mask(%eax)
116
117 popl %eax
118
119 /* From this point on the registers are restored and the stack
120 updated, so we don't need to worry about it if we're preempted */
121iret_restore_end:
122
123 /* Jump to hypervisor_callback after fixing up the stack.
124 Events are masked, so jumping out of the critical
125 region is OK. */
126 je xen_hypervisor_callback
127
Jeremy Fitzhardinge90e9f532008-03-17 16:37:12 -07001281: iret
Jeremy Fitzhardinge9ec2b802007-07-17 18:37:07 -0700129xen_iret_end_crit:
Jeremy Fitzhardinge90e9f532008-03-17 16:37:12 -0700130.section __ex_table,"a"
131 .align 4
132 .long 1b,iret_exc
133.previous
Jeremy Fitzhardinge9ec2b802007-07-17 18:37:07 -0700134
135hyper_iret:
136 /* put this out of line since its very rarely used */
137 jmp hypercall_page + __HYPERVISOR_iret * 32
138
139 .globl xen_iret_start_crit, xen_iret_end_crit
140
141/*
142 This is called by xen_hypervisor_callback in entry.S when it sees
143 that the EIP at the time of interrupt was between xen_iret_start_crit
144 and xen_iret_end_crit. We're passed the EIP in %eax so we can do
145 a more refined determination of what to do.
146
147 The stack format at this point is:
148 ----------------
149 ss : (ss/esp may be present if we came from usermode)
150 esp :
151 eflags } outer exception info
152 cs }
153 eip }
154 ---------------- <- edi (copy dest)
155 eax : outer eax if it hasn't been restored
156 ----------------
157 eflags } nested exception info
158 cs } (no ss/esp because we're nested
159 eip } from the same ring)
160 orig_eax }<- esi (copy src)
161 - - - - - - - -
162 fs }
163 es }
164 ds } SAVE_ALL state
165 eax }
166 : :
Jeremy Fitzhardinge0f2c8762008-03-17 16:37:22 -0700167 ebx }<- esp
Jeremy Fitzhardinge9ec2b802007-07-17 18:37:07 -0700168 ----------------
169
170 In order to deliver the nested exception properly, we need to shift
171 everything from the return addr up to the error code so it
172 sits just under the outer exception info. This means that when we
173 handle the exception, we do it in the context of the outer exception
174 rather than starting a new one.
175
176 The only caveat is that if the outer eax hasn't been
177 restored yet (ie, it's still on stack), we need to insert
178 its value into the SAVE_ALL state before going on, since
179 it's usermode state which we eventually need to restore.
180 */
181ENTRY(xen_iret_crit_fixup)
Jeremy Fitzhardinge9ec2b802007-07-17 18:37:07 -0700182 /*
Jeremy Fitzhardinge0f2c8762008-03-17 16:37:22 -0700183 Paranoia: Make sure we're really coming from kernel space.
Jeremy Fitzhardinge9ec2b802007-07-17 18:37:07 -0700184 One could imagine a case where userspace jumps into the
185 critical range address, but just before the CPU delivers a GP,
186 it decides to deliver an interrupt instead. Unlikely?
187 Definitely. Easy to avoid? Yes. The Intel documents
188 explicitly say that the reported EIP for a bad jump is the
189 jump instruction itself, not the destination, but some virtual
190 environments get this wrong.
191 */
Jeremy Fitzhardinge0f2c8762008-03-17 16:37:22 -0700192 movl PT_CS(%esp), %ecx
Jeremy Fitzhardinge9ec2b802007-07-17 18:37:07 -0700193 andl $SEGMENT_RPL_MASK, %ecx
194 cmpl $USER_RPL, %ecx
195 je 2f
196
Jeremy Fitzhardinge0f2c8762008-03-17 16:37:22 -0700197 lea PT_ORIG_EAX(%esp), %esi
198 lea PT_EFLAGS(%esp), %edi
Jeremy Fitzhardinge9ec2b802007-07-17 18:37:07 -0700199
200 /* If eip is before iret_restore_end then stack
201 hasn't been restored yet. */
202 cmp $iret_restore_end, %eax
203 jae 1f
204
Jeremy Fitzhardinge0f2c8762008-03-17 16:37:22 -0700205 movl 0+4(%edi),%eax /* copy EAX (just above top of frame) */
206 movl %eax, PT_EAX(%esp)
Jeremy Fitzhardinge9ec2b802007-07-17 18:37:07 -0700207
208 lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */
209
210 /* set up the copy */
2111: std
Jeremy Fitzhardinge0f2c8762008-03-17 16:37:22 -0700212 mov $PT_EIP / 4, %ecx /* saved regs up to orig_eax */
Jeremy Fitzhardinge9ec2b802007-07-17 18:37:07 -0700213 rep movsl
214 cld
215
216 lea 4(%edi),%esp /* point esp to new frame */
Jeremy Fitzhardinge0f2c8762008-03-17 16:37:22 -07002172: jmp xen_do_upcall
Jeremy Fitzhardinge64876732007-07-17 18:37:07 -0700218