blob: 465ae4e49c75654af6fb0d6b5bfaab5750e5975b [file] [log] [blame]
Jun Nakajima86797932011-01-29 14:24:24 -08001/*
2 * QEMU KVM support
3 *
4 * Copyright (C) 2006-2008 Qumranet Technologies
5 * Copyright IBM, Corp. 2008
6 *
7 * Authors:
8 * Anthony Liguori <aliguori@us.ibm.com>
9 *
10 * This work is licensed under the terms of the GNU GPL, version 2 or later.
11 * See the COPYING file in the top-level directory.
12 *
13 */
14
15#include <sys/types.h>
16#include <sys/ioctl.h>
17#include <sys/mman.h>
18
David 'Digit' Turner36597752011-05-20 01:18:01 +020019#undef __user
20#define __xuser /* nothing */
Jun Nakajima86797932011-01-29 14:24:24 -080021#include <linux/kvm.h>
22
23#include "qemu-common.h"
David 'Digit' Turner34c48ff2013-12-15 00:25:03 +010024#include "sysemu/sysemu.h"
25#include "sysemu/kvm.h"
Jun Nakajima86797932011-01-29 14:24:24 -080026#include "cpu.h"
David 'Digit' Turner852088c2013-12-14 23:04:12 +010027#include "exec/gdbstub.h"
Jun Nakajima86797932011-01-29 14:24:24 -080028
Jun Nakajimabb0140b2011-05-27 18:24:21 -070029#ifdef CONFIG_KVM_GS_RESTORE
30#include "kvm-gs-restore.h"
31#endif
32
Jun Nakajima86797932011-01-29 14:24:24 -080033//#define DEBUG_KVM
34
35#ifdef DEBUG_KVM
36#define dprintf(fmt, ...) \
37 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
38#else
39#define dprintf(fmt, ...) \
40 do { } while (0)
41#endif
42
43#ifdef KVM_CAP_EXT_CPUID
44
45static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
46{
47 struct kvm_cpuid2 *cpuid;
48 int r, size;
49
50 size = sizeof(*cpuid) + max * sizeof(*cpuid->entries);
David 'Digit' Turneraa8236d2014-01-10 17:02:29 +010051 cpuid = (struct kvm_cpuid2 *)g_malloc0(size);
Jun Nakajima86797932011-01-29 14:24:24 -080052 cpuid->nent = max;
53 r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid);
54 if (r == 0 && cpuid->nent >= max) {
55 r = -E2BIG;
56 }
57 if (r < 0) {
58 if (r == -E2BIG) {
David 'Digit' Turneraa8236d2014-01-10 17:02:29 +010059 g_free(cpuid);
Jun Nakajima86797932011-01-29 14:24:24 -080060 return NULL;
61 } else {
62 fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n",
63 strerror(-r));
64 exit(1);
65 }
66 }
67 return cpuid;
68}
69
David 'Digit' Turnere36a6832014-03-25 18:02:39 +010070uint32_t kvm_arch_get_supported_cpuid(CPUState *cpu, uint32_t function, int reg)
Jun Nakajima86797932011-01-29 14:24:24 -080071{
72 struct kvm_cpuid2 *cpuid;
73 int i, max;
74 uint32_t ret = 0;
75 uint32_t cpuid_1_edx;
76
David 'Digit' Turner66576782014-03-24 16:57:57 +010077 if (!kvm_check_extension(cpu->kvm_state, KVM_CAP_EXT_CPUID)) {
Jun Nakajima86797932011-01-29 14:24:24 -080078 return -1U;
79 }
80
81 max = 1;
David 'Digit' Turner66576782014-03-24 16:57:57 +010082 while ((cpuid = try_get_cpuid(cpu->kvm_state, max)) == NULL) {
Jun Nakajima86797932011-01-29 14:24:24 -080083 max *= 2;
84 }
85
86 for (i = 0; i < cpuid->nent; ++i) {
87 if (cpuid->entries[i].function == function) {
88 switch (reg) {
89 case R_EAX:
90 ret = cpuid->entries[i].eax;
91 break;
92 case R_EBX:
93 ret = cpuid->entries[i].ebx;
94 break;
95 case R_ECX:
96 ret = cpuid->entries[i].ecx;
97 break;
98 case R_EDX:
99 ret = cpuid->entries[i].edx;
100 if (function == 0x80000001) {
101 /* On Intel, kvm returns cpuid according to the Intel spec,
102 * so add missing bits according to the AMD spec:
103 */
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100104 cpuid_1_edx = kvm_arch_get_supported_cpuid(cpu, 1, R_EDX);
Jun Nakajima86797932011-01-29 14:24:24 -0800105 ret |= cpuid_1_edx & 0xdfeff7ff;
106 }
107 break;
108 }
109 }
110 }
111
David 'Digit' Turneraa8236d2014-01-10 17:02:29 +0100112 g_free(cpuid);
Jun Nakajima86797932011-01-29 14:24:24 -0800113
114 return ret;
115}
116
117#else
118
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100119uint32_t kvm_arch_get_supported_cpuid(CPUState *cpu, uint32_t function, int reg)
Jun Nakajima86797932011-01-29 14:24:24 -0800120{
121 return -1U;
122}
123
124#endif
125
David 'Digit' Turner36597752011-05-20 01:18:01 +0200126#ifndef KVM_MP_STATE_RUNNABLE
127#define KVM_MP_STATE_RUNNABLE 0
128#endif
129
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100130int kvm_arch_init_vcpu(CPUState *cpu)
Jun Nakajima86797932011-01-29 14:24:24 -0800131{
132 struct {
133 struct kvm_cpuid2 cpuid;
134 struct kvm_cpuid_entry2 entries[100];
135 } __attribute__((packed)) cpuid_data;
136 uint32_t limit, i, j, cpuid_i;
137 uint32_t unused;
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100138 CPUX86State *env = cpu->env_ptr;
Jun Nakajima86797932011-01-29 14:24:24 -0800139
140 env->mp_state = KVM_MP_STATE_RUNNABLE;
141
142 cpuid_i = 0;
143
144 cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
145
146 for (i = 0; i <= limit; i++) {
147 struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++];
148
149 switch (i) {
150 case 2: {
151 /* Keep reading function 2 till all the input is received */
152 int times;
153
154 c->function = i;
155 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
156 KVM_CPUID_FLAG_STATE_READ_NEXT;
157 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
158 times = c->eax & 0xff;
159
160 for (j = 1; j < times; ++j) {
161 c = &cpuid_data.entries[cpuid_i++];
162 c->function = i;
163 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
164 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
165 }
166 break;
167 }
168 case 4:
169 case 0xb:
170 case 0xd:
171 for (j = 0; ; j++) {
172 c->function = i;
173 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
174 c->index = j;
175 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
176
177 if (i == 4 && c->eax == 0)
178 break;
179 if (i == 0xb && !(c->ecx & 0xff00))
180 break;
181 if (i == 0xd && c->eax == 0)
182 break;
183
184 c = &cpuid_data.entries[cpuid_i++];
185 }
186 break;
187 default:
188 c->function = i;
189 c->flags = 0;
190 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
191 break;
192 }
193 }
194 cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
195
196 for (i = 0x80000000; i <= limit; i++) {
197 struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++];
198
199 c->function = i;
200 c->flags = 0;
201 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
202 }
203
204 cpuid_data.cpuid.nent = cpuid_i;
205
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100206 return kvm_vcpu_ioctl(cpu, KVM_SET_CPUID2, &cpuid_data);
Jun Nakajima86797932011-01-29 14:24:24 -0800207}
208
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100209static int kvm_has_msr_star(CPUX86State *env)
Jun Nakajima86797932011-01-29 14:24:24 -0800210{
211 static int has_msr_star;
212 int ret;
David 'Digit' Turner66576782014-03-24 16:57:57 +0100213 CPUState *cpu = ENV_GET_CPU(env);
Jun Nakajima86797932011-01-29 14:24:24 -0800214
215 /* first time */
David 'Digit' Turnerc0052462014-02-25 18:39:29 +0100216 if (has_msr_star == 0) {
Jun Nakajima86797932011-01-29 14:24:24 -0800217 struct kvm_msr_list msr_list, *kvm_msr_list;
218
219 has_msr_star = -1;
220
221 /* Obtain MSR list from KVM. These are the MSRs that we must
222 * save/restore */
223 msr_list.nmsrs = 0;
David 'Digit' Turner66576782014-03-24 16:57:57 +0100224 ret = kvm_ioctl(cpu->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list);
Jun Nakajima86797932011-01-29 14:24:24 -0800225 if (ret < 0)
226 return 0;
227
David 'Digit' Turneraa8236d2014-01-10 17:02:29 +0100228 kvm_msr_list = g_malloc0(sizeof(msr_list) +
Jun Nakajima86797932011-01-29 14:24:24 -0800229 msr_list.nmsrs * sizeof(msr_list.indices[0]));
230
231 kvm_msr_list->nmsrs = msr_list.nmsrs;
David 'Digit' Turner66576782014-03-24 16:57:57 +0100232 ret = kvm_ioctl(cpu->kvm_state, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
Jun Nakajima86797932011-01-29 14:24:24 -0800233 if (ret >= 0) {
234 int i;
235
236 for (i = 0; i < kvm_msr_list->nmsrs; i++) {
237 if (kvm_msr_list->indices[i] == MSR_STAR) {
238 has_msr_star = 1;
239 break;
240 }
241 }
242 }
243
244 free(kvm_msr_list);
245 }
246
247 if (has_msr_star == 1)
248 return 1;
249 return 0;
250}
251
252int kvm_arch_init(KVMState *s, int smp_cpus)
253{
254 int ret;
255
256 /* create vm86 tss. KVM uses vm86 mode to emulate 16-bit code
257 * directly. In order to use vm86 mode, a TSS is needed. Since this
258 * must be part of guest physical memory, we need to allocate it. Older
259 * versions of KVM just assumed that it would be at the end of physical
260 * memory but that doesn't work with more than 4GB of memory. We simply
261 * refuse to work with those older versions of KVM. */
262 ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
263 if (ret <= 0) {
264 fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n");
265 return ret;
266 }
267
268 /* this address is 3 pages before the bios, and the bios should present
269 * as unavaible memory. FIXME, need to ensure the e820 map deals with
270 * this?
271 */
272 return kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, 0xfffbd000);
273}
David 'Digit' Turnerc0052462014-02-25 18:39:29 +0100274
Jun Nakajima86797932011-01-29 14:24:24 -0800275static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
276{
277 lhs->selector = rhs->selector;
278 lhs->base = rhs->base;
279 lhs->limit = rhs->limit;
280 lhs->type = 3;
281 lhs->present = 1;
282 lhs->dpl = 3;
283 lhs->db = 0;
284 lhs->s = 1;
285 lhs->l = 0;
286 lhs->g = 0;
287 lhs->avl = 0;
288 lhs->unusable = 0;
289}
290
291static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
292{
293 unsigned flags = rhs->flags;
294 lhs->selector = rhs->selector;
295 lhs->base = rhs->base;
296 lhs->limit = rhs->limit;
297 lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
298 lhs->present = (flags & DESC_P_MASK) != 0;
299 lhs->dpl = rhs->selector & 3;
300 lhs->db = (flags >> DESC_B_SHIFT) & 1;
301 lhs->s = (flags & DESC_S_MASK) != 0;
302 lhs->l = (flags >> DESC_L_SHIFT) & 1;
303 lhs->g = (flags & DESC_G_MASK) != 0;
304 lhs->avl = (flags & DESC_AVL_MASK) != 0;
305 lhs->unusable = 0;
306}
307
308static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
309{
310 lhs->selector = rhs->selector;
311 lhs->base = rhs->base;
312 lhs->limit = rhs->limit;
313 lhs->flags =
314 (rhs->type << DESC_TYPE_SHIFT)
315 | (rhs->present * DESC_P_MASK)
316 | (rhs->dpl << DESC_DPL_SHIFT)
317 | (rhs->db << DESC_B_SHIFT)
318 | (rhs->s * DESC_S_MASK)
319 | (rhs->l << DESC_L_SHIFT)
320 | (rhs->g * DESC_G_MASK)
321 | (rhs->avl * DESC_AVL_MASK);
322}
323
324static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set)
325{
326 if (set)
327 *kvm_reg = *qemu_reg;
328 else
329 *qemu_reg = *kvm_reg;
330}
331
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100332static int kvm_getput_regs(CPUX86State *env, int set)
Jun Nakajima86797932011-01-29 14:24:24 -0800333{
334 struct kvm_regs regs;
335 int ret = 0;
336
337 if (!set) {
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100338 ret = kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_GET_REGS, &regs);
Jun Nakajima86797932011-01-29 14:24:24 -0800339 if (ret < 0)
340 return ret;
341 }
342
343 kvm_getput_reg(&regs.rax, &env->regs[R_EAX], set);
344 kvm_getput_reg(&regs.rbx, &env->regs[R_EBX], set);
345 kvm_getput_reg(&regs.rcx, &env->regs[R_ECX], set);
346 kvm_getput_reg(&regs.rdx, &env->regs[R_EDX], set);
347 kvm_getput_reg(&regs.rsi, &env->regs[R_ESI], set);
348 kvm_getput_reg(&regs.rdi, &env->regs[R_EDI], set);
349 kvm_getput_reg(&regs.rsp, &env->regs[R_ESP], set);
350 kvm_getput_reg(&regs.rbp, &env->regs[R_EBP], set);
351#ifdef TARGET_X86_64
352 kvm_getput_reg(&regs.r8, &env->regs[8], set);
353 kvm_getput_reg(&regs.r9, &env->regs[9], set);
354 kvm_getput_reg(&regs.r10, &env->regs[10], set);
355 kvm_getput_reg(&regs.r11, &env->regs[11], set);
356 kvm_getput_reg(&regs.r12, &env->regs[12], set);
357 kvm_getput_reg(&regs.r13, &env->regs[13], set);
358 kvm_getput_reg(&regs.r14, &env->regs[14], set);
359 kvm_getput_reg(&regs.r15, &env->regs[15], set);
360#endif
361
362 kvm_getput_reg(&regs.rflags, &env->eflags, set);
363 kvm_getput_reg(&regs.rip, &env->eip, set);
364
365 if (set)
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100366 ret = kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_SET_REGS, &regs);
Jun Nakajima86797932011-01-29 14:24:24 -0800367
368 return ret;
369}
370
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100371static int kvm_put_fpu(CPUX86State *env)
Jun Nakajima86797932011-01-29 14:24:24 -0800372{
373 struct kvm_fpu fpu;
374 int i;
375
376 memset(&fpu, 0, sizeof fpu);
377 fpu.fsw = env->fpus & ~(7 << 11);
378 fpu.fsw |= (env->fpstt & 7) << 11;
379 fpu.fcw = env->fpuc;
380 for (i = 0; i < 8; ++i)
381 fpu.ftwx |= (!env->fptags[i]) << i;
382 memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
383 memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
384 fpu.mxcsr = env->mxcsr;
385
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100386 return kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_SET_FPU, &fpu);
Jun Nakajima86797932011-01-29 14:24:24 -0800387}
388
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100389static int kvm_put_sregs(CPUX86State *env)
Jun Nakajima86797932011-01-29 14:24:24 -0800390{
391 struct kvm_sregs sregs;
392
393 memcpy(sregs.interrupt_bitmap,
394 env->interrupt_bitmap,
395 sizeof(sregs.interrupt_bitmap));
396
397 if ((env->eflags & VM_MASK)) {
398 set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
399 set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
400 set_v8086_seg(&sregs.es, &env->segs[R_ES]);
401 set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
402 set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
403 set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
404 } else {
405 set_seg(&sregs.cs, &env->segs[R_CS]);
406 set_seg(&sregs.ds, &env->segs[R_DS]);
407 set_seg(&sregs.es, &env->segs[R_ES]);
408 set_seg(&sregs.fs, &env->segs[R_FS]);
409 set_seg(&sregs.gs, &env->segs[R_GS]);
410 set_seg(&sregs.ss, &env->segs[R_SS]);
411
412 if (env->cr[0] & CR0_PE_MASK) {
413 /* force ss cpl to cs cpl */
414 sregs.ss.selector = (sregs.ss.selector & ~3) |
415 (sregs.cs.selector & 3);
416 sregs.ss.dpl = sregs.ss.selector & 3;
417 }
418 }
419
420 set_seg(&sregs.tr, &env->tr);
421 set_seg(&sregs.ldt, &env->ldt);
422
423 sregs.idt.limit = env->idt.limit;
424 sregs.idt.base = env->idt.base;
425 sregs.gdt.limit = env->gdt.limit;
426 sregs.gdt.base = env->gdt.base;
427
428 sregs.cr0 = env->cr[0];
429 sregs.cr2 = env->cr[2];
430 sregs.cr3 = env->cr[3];
431 sregs.cr4 = env->cr[4];
432
433 sregs.cr8 = cpu_get_apic_tpr(env);
434 sregs.apic_base = cpu_get_apic_base(env);
435
436 sregs.efer = env->efer;
437
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100438 return kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_SET_SREGS, &sregs);
Jun Nakajima86797932011-01-29 14:24:24 -0800439}
440
441static void kvm_msr_entry_set(struct kvm_msr_entry *entry,
442 uint32_t index, uint64_t value)
443{
444 entry->index = index;
445 entry->data = value;
446}
447
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100448static int kvm_put_msrs(CPUX86State *env)
Jun Nakajima86797932011-01-29 14:24:24 -0800449{
450 struct {
451 struct kvm_msrs info;
452 struct kvm_msr_entry entries[100];
453 } msr_data;
454 struct kvm_msr_entry *msrs = msr_data.entries;
455 int n = 0;
456
457 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
458 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
459 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
460 if (kvm_has_msr_star(env))
461 kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
462 kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
463#ifdef TARGET_X86_64
464 /* FIXME if lm capable */
465 kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
466 kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
467 kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
468 kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
469#endif
470 msr_data.info.nmsrs = n;
471
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100472 return kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_SET_MSRS, &msr_data);
Jun Nakajima86797932011-01-29 14:24:24 -0800473
474}
475
476
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100477static int kvm_get_fpu(CPUX86State *env)
Jun Nakajima86797932011-01-29 14:24:24 -0800478{
479 struct kvm_fpu fpu;
480 int i, ret;
481
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100482 ret = kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_GET_FPU, &fpu);
Jun Nakajima86797932011-01-29 14:24:24 -0800483 if (ret < 0)
484 return ret;
485
486 env->fpstt = (fpu.fsw >> 11) & 7;
487 env->fpus = fpu.fsw;
488 env->fpuc = fpu.fcw;
489 for (i = 0; i < 8; ++i)
490 env->fptags[i] = !((fpu.ftwx >> i) & 1);
491 memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
492 memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
493 env->mxcsr = fpu.mxcsr;
494
495 return 0;
496}
497
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100498int kvm_get_sregs(CPUState *cpu)
Jun Nakajima86797932011-01-29 14:24:24 -0800499{
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100500 CPUX86State *env = cpu->env_ptr;
Jun Nakajima86797932011-01-29 14:24:24 -0800501 struct kvm_sregs sregs;
502 uint32_t hflags;
503 int ret;
504
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100505 ret = kvm_vcpu_ioctl(cpu, KVM_GET_SREGS, &sregs);
Jun Nakajima86797932011-01-29 14:24:24 -0800506 if (ret < 0)
507 return ret;
508
David 'Digit' Turnerc0052462014-02-25 18:39:29 +0100509 memcpy(env->interrupt_bitmap,
Jun Nakajima86797932011-01-29 14:24:24 -0800510 sregs.interrupt_bitmap,
511 sizeof(sregs.interrupt_bitmap));
512
513 get_seg(&env->segs[R_CS], &sregs.cs);
514 get_seg(&env->segs[R_DS], &sregs.ds);
515 get_seg(&env->segs[R_ES], &sregs.es);
516 get_seg(&env->segs[R_FS], &sregs.fs);
517 get_seg(&env->segs[R_GS], &sregs.gs);
518 get_seg(&env->segs[R_SS], &sregs.ss);
519
520 get_seg(&env->tr, &sregs.tr);
521 get_seg(&env->ldt, &sregs.ldt);
522
523 env->idt.limit = sregs.idt.limit;
524 env->idt.base = sregs.idt.base;
525 env->gdt.limit = sregs.gdt.limit;
526 env->gdt.base = sregs.gdt.base;
527
528 env->cr[0] = sregs.cr0;
529 env->cr[2] = sregs.cr2;
530 env->cr[3] = sregs.cr3;
531 env->cr[4] = sregs.cr4;
532
533 cpu_set_apic_base(env, sregs.apic_base);
534
535 env->efer = sregs.efer;
536 //cpu_set_apic_tpr(env, sregs.cr8);
537
538#define HFLAG_COPY_MASK ~( \
539 HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
540 HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
541 HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
542 HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
543
544
545
546 hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
547 hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
548 hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
549 (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
550 hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
551 hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
552 (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
553
554 if (env->efer & MSR_EFER_LMA) {
555 hflags |= HF_LMA_MASK;
556 }
557
558 if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
559 hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
560 } else {
561 hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
562 (DESC_B_SHIFT - HF_CS32_SHIFT);
563 hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
564 (DESC_B_SHIFT - HF_SS32_SHIFT);
565 if (!(env->cr[0] & CR0_PE_MASK) ||
566 (env->eflags & VM_MASK) ||
567 !(hflags & HF_CS32_MASK)) {
568 hflags |= HF_ADDSEG_MASK;
569 } else {
570 hflags |= ((env->segs[R_DS].base |
571 env->segs[R_ES].base |
572 env->segs[R_SS].base) != 0) <<
573 HF_ADDSEG_SHIFT;
574 }
575 }
576 env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
577
578 return 0;
579}
580
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100581static int kvm_get_msrs(CPUX86State *env)
Jun Nakajima86797932011-01-29 14:24:24 -0800582{
583 struct {
584 struct kvm_msrs info;
585 struct kvm_msr_entry entries[100];
586 } msr_data;
587 struct kvm_msr_entry *msrs = msr_data.entries;
588 int ret, i, n;
589
590 n = 0;
591 msrs[n++].index = MSR_IA32_SYSENTER_CS;
592 msrs[n++].index = MSR_IA32_SYSENTER_ESP;
593 msrs[n++].index = MSR_IA32_SYSENTER_EIP;
594 if (kvm_has_msr_star(env))
595 msrs[n++].index = MSR_STAR;
596 msrs[n++].index = MSR_IA32_TSC;
597#ifdef TARGET_X86_64
598 /* FIXME lm_capable_kernel */
599 msrs[n++].index = MSR_CSTAR;
600 msrs[n++].index = MSR_KERNELGSBASE;
601 msrs[n++].index = MSR_FMASK;
602 msrs[n++].index = MSR_LSTAR;
603#endif
604 msr_data.info.nmsrs = n;
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100605 ret = kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_GET_MSRS, &msr_data);
Jun Nakajima86797932011-01-29 14:24:24 -0800606 if (ret < 0)
607 return ret;
608
609 for (i = 0; i < ret; i++) {
610 switch (msrs[i].index) {
611 case MSR_IA32_SYSENTER_CS:
612 env->sysenter_cs = msrs[i].data;
613 break;
614 case MSR_IA32_SYSENTER_ESP:
615 env->sysenter_esp = msrs[i].data;
616 break;
617 case MSR_IA32_SYSENTER_EIP:
618 env->sysenter_eip = msrs[i].data;
619 break;
620 case MSR_STAR:
621 env->star = msrs[i].data;
622 break;
623#ifdef TARGET_X86_64
624 case MSR_CSTAR:
625 env->cstar = msrs[i].data;
626 break;
627 case MSR_KERNELGSBASE:
628 env->kernelgsbase = msrs[i].data;
629 break;
630 case MSR_FMASK:
631 env->fmask = msrs[i].data;
632 break;
633 case MSR_LSTAR:
634 env->lstar = msrs[i].data;
635 break;
636#endif
637 case MSR_IA32_TSC:
638 env->tsc = msrs[i].data;
639 break;
640 }
641 }
642
643 return 0;
644}
645
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100646int kvm_arch_put_registers(CPUState *cpu)
Jun Nakajima86797932011-01-29 14:24:24 -0800647{
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100648 CPUX86State *env = cpu->env_ptr;
Jun Nakajima86797932011-01-29 14:24:24 -0800649 int ret;
650
651 ret = kvm_getput_regs(env, 1);
652 if (ret < 0)
653 return ret;
654
655 ret = kvm_put_fpu(env);
656 if (ret < 0)
657 return ret;
658
659 ret = kvm_put_sregs(env);
660 if (ret < 0)
661 return ret;
662
663 ret = kvm_put_msrs(env);
664 if (ret < 0)
665 return ret;
666
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100667 ret = kvm_put_mp_state(cpu);
Jun Nakajima86797932011-01-29 14:24:24 -0800668 if (ret < 0)
669 return ret;
670
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100671 ret = kvm_get_mp_state(cpu);
Jun Nakajima86797932011-01-29 14:24:24 -0800672 if (ret < 0)
673 return ret;
674
675 return 0;
676}
677
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100678int kvm_arch_get_registers(CPUState *cpu)
Jun Nakajima86797932011-01-29 14:24:24 -0800679{
680 int ret;
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100681 CPUX86State *env = cpu->env_ptr;
Jun Nakajima86797932011-01-29 14:24:24 -0800682
683 ret = kvm_getput_regs(env, 0);
684 if (ret < 0)
685 return ret;
686
687 ret = kvm_get_fpu(env);
688 if (ret < 0)
689 return ret;
690
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100691 ret = kvm_get_sregs(cpu);
Jun Nakajima86797932011-01-29 14:24:24 -0800692 if (ret < 0)
693 return ret;
694
695 ret = kvm_get_msrs(env);
696 if (ret < 0)
697 return ret;
698
699 return 0;
700}
701
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100702int kvm_arch_vcpu_run(CPUState *cpu)
Jun Nakajimabb0140b2011-05-27 18:24:21 -0700703{
704#ifdef CONFIG_KVM_GS_RESTORE
705 if (gs_need_restore != KVM_GS_RESTORE_NO)
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100706 return no_gs_ioctl(cpu->kvm_fd, KVM_RUN, 0);
Jun Nakajimabb0140b2011-05-27 18:24:21 -0700707 else
708#endif
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100709 return kvm_vcpu_ioctl(cpu, KVM_RUN, 0);
Jun Nakajimabb0140b2011-05-27 18:24:21 -0700710}
711
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100712int kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
Jun Nakajima86797932011-01-29 14:24:24 -0800713{
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100714 CPUX86State *env = cpu->env_ptr;
David 'Digit' Turner66576782014-03-24 16:57:57 +0100715
Jun Nakajima86797932011-01-29 14:24:24 -0800716 /* Try to inject an interrupt if the guest can accept it */
717 if (run->ready_for_interrupt_injection &&
David 'Digit' Turner66576782014-03-24 16:57:57 +0100718 (cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
Jun Nakajima86797932011-01-29 14:24:24 -0800719 (env->eflags & IF_MASK)) {
720 int irq;
721
David 'Digit' Turner66576782014-03-24 16:57:57 +0100722 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
Jun Nakajima86797932011-01-29 14:24:24 -0800723 irq = cpu_get_pic_interrupt(env);
724 if (irq >= 0) {
725 struct kvm_interrupt intr;
726 intr.irq = irq;
727 /* FIXME: errors */
728 dprintf("injected interrupt %d\n", irq);
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100729 kvm_vcpu_ioctl(cpu, KVM_INTERRUPT, &intr);
Jun Nakajima86797932011-01-29 14:24:24 -0800730 }
731 }
732
733 /* If we have an interrupt but the guest is not ready to receive an
734 * interrupt, request an interrupt window exit. This will
735 * cause a return to userspace as soon as the guest is ready to
736 * receive interrupts. */
David 'Digit' Turner66576782014-03-24 16:57:57 +0100737 if ((cpu->interrupt_request & CPU_INTERRUPT_HARD))
Jun Nakajima86797932011-01-29 14:24:24 -0800738 run->request_interrupt_window = 1;
739 else
740 run->request_interrupt_window = 0;
741
742 dprintf("setting tpr\n");
743 run->cr8 = cpu_get_apic_tpr(env);
744
Jun Nakajimabb0140b2011-05-27 18:24:21 -0700745#ifdef CONFIG_KVM_GS_RESTORE
746 gs_base_pre_run();
747#endif
748
Jun Nakajima86797932011-01-29 14:24:24 -0800749 return 0;
750}
751
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100752int kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
Jun Nakajima86797932011-01-29 14:24:24 -0800753{
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100754 CPUX86State *env = cpu->env_ptr;
Jun Nakajimabb0140b2011-05-27 18:24:21 -0700755#ifdef CONFIG_KVM_GS_RESTORE
756 gs_base_post_run();
757#endif
Jun Nakajima86797932011-01-29 14:24:24 -0800758 if (run->if_flag)
759 env->eflags |= IF_MASK;
760 else
761 env->eflags &= ~IF_MASK;
David 'Digit' Turnerc0052462014-02-25 18:39:29 +0100762
Jun Nakajima86797932011-01-29 14:24:24 -0800763 cpu_set_apic_tpr(env, run->cr8);
764 cpu_set_apic_base(env, run->apic_base);
765
766 return 0;
767}
768
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100769static int kvm_handle_halt(CPUState *cpu)
Jun Nakajima86797932011-01-29 14:24:24 -0800770{
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100771 CPUX86State *env = cpu->env_ptr;
David 'Digit' Turner66576782014-03-24 16:57:57 +0100772
773 if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
Jun Nakajima86797932011-01-29 14:24:24 -0800774 (env->eflags & IF_MASK)) &&
David 'Digit' Turner66576782014-03-24 16:57:57 +0100775 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
776 cpu->halted = 1;
Jun Nakajima86797932011-01-29 14:24:24 -0800777 env->exception_index = EXCP_HLT;
778 return 0;
779 }
780
781 return 1;
782}
783
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100784int kvm_arch_handle_exit(CPUState *cpu, struct kvm_run *run)
Jun Nakajima86797932011-01-29 14:24:24 -0800785{
786 int ret = 0;
787
788 switch (run->exit_reason) {
789 case KVM_EXIT_HLT:
790 dprintf("handle_hlt\n");
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100791 ret = kvm_handle_halt(cpu);
Jun Nakajima86797932011-01-29 14:24:24 -0800792 break;
793 }
794
795 return ret;
796}
797
798#ifdef KVM_CAP_SET_GUEST_DEBUG
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100799int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
Jun Nakajima86797932011-01-29 14:24:24 -0800800{
801 const static uint8_t int3 = 0xcc;
802
David 'Digit' Turneraaef2752014-03-25 17:19:27 +0100803 if (cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
804 cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&int3, 1, 1))
Jun Nakajima86797932011-01-29 14:24:24 -0800805 return -EINVAL;
806 return 0;
807}
808
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100809int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
Jun Nakajima86797932011-01-29 14:24:24 -0800810{
811 uint8_t int3;
812
David 'Digit' Turneraaef2752014-03-25 17:19:27 +0100813 if (cpu_memory_rw_debug(cpu, bp->pc, &int3, 1, 0) || int3 != 0xcc ||
814 cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1))
Jun Nakajima86797932011-01-29 14:24:24 -0800815 return -EINVAL;
816 return 0;
817}
818
819static struct {
820 target_ulong addr;
821 int len;
822 int type;
823} hw_breakpoint[4];
824
825static int nb_hw_breakpoint;
826
827static int find_hw_breakpoint(target_ulong addr, int len, int type)
828{
829 int n;
830
831 for (n = 0; n < nb_hw_breakpoint; n++)
832 if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type &&
833 (hw_breakpoint[n].len == len || len == -1))
834 return n;
835 return -1;
836}
837
838int kvm_arch_insert_hw_breakpoint(target_ulong addr,
839 target_ulong len, int type)
840{
841 switch (type) {
842 case GDB_BREAKPOINT_HW:
843 len = 1;
844 break;
845 case GDB_WATCHPOINT_WRITE:
846 case GDB_WATCHPOINT_ACCESS:
847 switch (len) {
848 case 1:
849 break;
850 case 2:
851 case 4:
852 case 8:
853 if (addr & (len - 1))
854 return -EINVAL;
855 break;
856 default:
857 return -EINVAL;
858 }
859 break;
860 default:
861 return -ENOSYS;
862 }
863
864 if (nb_hw_breakpoint == 4)
865 return -ENOBUFS;
866
867 if (find_hw_breakpoint(addr, len, type) >= 0)
868 return -EEXIST;
869
870 hw_breakpoint[nb_hw_breakpoint].addr = addr;
871 hw_breakpoint[nb_hw_breakpoint].len = len;
872 hw_breakpoint[nb_hw_breakpoint].type = type;
873 nb_hw_breakpoint++;
874
875 return 0;
876}
877
878int kvm_arch_remove_hw_breakpoint(target_ulong addr,
879 target_ulong len, int type)
880{
881 int n;
882
883 n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type);
884 if (n < 0)
885 return -ENOENT;
886
887 nb_hw_breakpoint--;
888 hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint];
889
890 return 0;
891}
892
893void kvm_arch_remove_all_hw_breakpoints(void)
894{
895 nb_hw_breakpoint = 0;
896}
897
898static CPUWatchpoint hw_watchpoint;
899
900int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info)
901{
902 int handle = 0;
903 int n;
904
905 if (arch_info->exception == 1) {
906 if (arch_info->dr6 & (1 << 14)) {
David 'Digit' Turnerfed223d2014-03-24 17:42:47 +0100907 if (current_cpu->singlestep_enabled)
Jun Nakajima86797932011-01-29 14:24:24 -0800908 handle = 1;
909 } else {
910 for (n = 0; n < 4; n++)
911 if (arch_info->dr6 & (1 << n))
912 switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) {
913 case 0x0:
914 handle = 1;
915 break;
916 case 0x1:
917 handle = 1;
918 cpu_single_env->watchpoint_hit = &hw_watchpoint;
919 hw_watchpoint.vaddr = hw_breakpoint[n].addr;
920 hw_watchpoint.flags = BP_MEM_WRITE;
921 break;
922 case 0x3:
923 handle = 1;
924 cpu_single_env->watchpoint_hit = &hw_watchpoint;
925 hw_watchpoint.vaddr = hw_breakpoint[n].addr;
926 hw_watchpoint.flags = BP_MEM_ACCESS;
927 break;
928 }
929 }
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100930 } else if (kvm_find_sw_breakpoint(current_cpu, arch_info->pc))
Jun Nakajima86797932011-01-29 14:24:24 -0800931 handle = 1;
932
933 if (!handle)
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100934 kvm_update_guest_debug(current_cpu,
Jun Nakajima86797932011-01-29 14:24:24 -0800935 (arch_info->exception == 1) ?
936 KVM_GUESTDBG_INJECT_DB : KVM_GUESTDBG_INJECT_BP);
937
938 return handle;
939}
940
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100941void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
Jun Nakajima86797932011-01-29 14:24:24 -0800942{
943 const uint8_t type_code[] = {
944 [GDB_BREAKPOINT_HW] = 0x0,
945 [GDB_WATCHPOINT_WRITE] = 0x1,
946 [GDB_WATCHPOINT_ACCESS] = 0x3
947 };
948 const uint8_t len_code[] = {
949 [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2
950 };
951 int n;
952
David 'Digit' Turnere36a6832014-03-25 18:02:39 +0100953 if (kvm_sw_breakpoints_active(cpu))
Jun Nakajima86797932011-01-29 14:24:24 -0800954 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
955
956 if (nb_hw_breakpoint > 0) {
957 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
958 dbg->arch.debugreg[7] = 0x0600;
959 for (n = 0; n < nb_hw_breakpoint; n++) {
960 dbg->arch.debugreg[n] = hw_breakpoint[n].addr;
961 dbg->arch.debugreg[7] |= (2 << (n * 2)) |
962 (type_code[hw_breakpoint[n].type] << (16 + n*4)) |
963 (len_code[hw_breakpoint[n].len] << (18 + n*4));
964 }
965 }
966}
967#endif /* KVM_CAP_SET_GUEST_DEBUG */