blob: c26e3715bee8317391e274e1db3d621989341051 [file] [log] [blame]
Carsten Otte043405e2007-10-10 17:16:19 +02001/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * derived from drivers/kvm/kvm_main.c
5 *
6 * Copyright (C) 2006 Qumranet, Inc.
7 *
8 * Authors:
9 * Avi Kivity <avi@qumranet.com>
10 * Yaniv Kamay <yaniv@qumranet.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2. See
13 * the COPYING file in the top-level directory.
14 *
15 */
16
Carsten Otte313a3dc2007-10-11 19:16:52 +020017#include "kvm.h"
Carsten Otte043405e2007-10-10 17:16:19 +020018#include "x86.h"
Carsten Otte5fb76f92007-10-29 16:08:51 +010019#include "segment_descriptor.h"
Carsten Otte313a3dc2007-10-11 19:16:52 +020020#include "irq.h"
21
22#include <linux/kvm.h>
23#include <linux/fs.h>
24#include <linux/vmalloc.h>
Carsten Otte5fb76f92007-10-29 16:08:51 +010025#include <linux/module.h>
Carsten Otte043405e2007-10-10 17:16:19 +020026
27#include <asm/uaccess.h>
28
Carsten Otte313a3dc2007-10-11 19:16:52 +020029#define MAX_IO_MSRS 256
30
Carsten Otte5fb76f92007-10-29 16:08:51 +010031unsigned long segment_base(u16 selector)
32{
33 struct descriptor_table gdt;
34 struct segment_descriptor *d;
35 unsigned long table_base;
36 unsigned long v;
37
38 if (selector == 0)
39 return 0;
40
41 asm("sgdt %0" : "=m"(gdt));
42 table_base = gdt.base;
43
44 if (selector & 4) { /* from ldt */
45 u16 ldt_selector;
46
47 asm("sldt %0" : "=g"(ldt_selector));
48 table_base = segment_base(ldt_selector);
49 }
50 d = (struct segment_descriptor *)(table_base + (selector & ~7));
51 v = d->base_low | ((unsigned long)d->base_mid << 16) |
52 ((unsigned long)d->base_high << 24);
53#ifdef CONFIG_X86_64
54 if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
55 v |= ((unsigned long) \
56 ((struct segment_descriptor_64 *)d)->base_higher) << 32;
57#endif
58 return v;
59}
60EXPORT_SYMBOL_GPL(segment_base);
61
Carsten Otte6866b832007-10-29 16:09:10 +010062u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
63{
64 if (irqchip_in_kernel(vcpu->kvm))
65 return vcpu->apic_base;
66 else
67 return vcpu->apic_base;
68}
69EXPORT_SYMBOL_GPL(kvm_get_apic_base);
70
71void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
72{
73 /* TODO: reserve bits check */
74 if (irqchip_in_kernel(vcpu->kvm))
75 kvm_lapic_set_base(vcpu, data);
76 else
77 vcpu->apic_base = data;
78}
79EXPORT_SYMBOL_GPL(kvm_set_apic_base);
80
Carsten Otte043405e2007-10-10 17:16:19 +020081/*
82 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
83 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
84 *
85 * This list is modified at module load time to reflect the
86 * capabilities of the host cpu.
87 */
88static u32 msrs_to_save[] = {
89 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
90 MSR_K6_STAR,
91#ifdef CONFIG_X86_64
92 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
93#endif
94 MSR_IA32_TIME_STAMP_COUNTER,
95};
96
97static unsigned num_msrs_to_save;
98
99static u32 emulated_msrs[] = {
100 MSR_IA32_MISC_ENABLE,
101};
102
Carsten Otte313a3dc2007-10-11 19:16:52 +0200103/*
104 * Adapt set_msr() to msr_io()'s calling convention
105 */
106static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
107{
108 return kvm_set_msr(vcpu, index, *data);
109}
110
111/*
112 * Read or write a bunch of msrs. All parameters are kernel addresses.
113 *
114 * @return number of msrs set successfully.
115 */
116static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
117 struct kvm_msr_entry *entries,
118 int (*do_msr)(struct kvm_vcpu *vcpu,
119 unsigned index, u64 *data))
120{
121 int i;
122
123 vcpu_load(vcpu);
124
125 for (i = 0; i < msrs->nmsrs; ++i)
126 if (do_msr(vcpu, entries[i].index, &entries[i].data))
127 break;
128
129 vcpu_put(vcpu);
130
131 return i;
132}
133
134/*
135 * Read or write a bunch of msrs. Parameters are user addresses.
136 *
137 * @return number of msrs set successfully.
138 */
139static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
140 int (*do_msr)(struct kvm_vcpu *vcpu,
141 unsigned index, u64 *data),
142 int writeback)
143{
144 struct kvm_msrs msrs;
145 struct kvm_msr_entry *entries;
146 int r, n;
147 unsigned size;
148
149 r = -EFAULT;
150 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
151 goto out;
152
153 r = -E2BIG;
154 if (msrs.nmsrs >= MAX_IO_MSRS)
155 goto out;
156
157 r = -ENOMEM;
158 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
159 entries = vmalloc(size);
160 if (!entries)
161 goto out;
162
163 r = -EFAULT;
164 if (copy_from_user(entries, user_msrs->entries, size))
165 goto out_free;
166
167 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
168 if (r < 0)
169 goto out_free;
170
171 r = -EFAULT;
172 if (writeback && copy_to_user(user_msrs->entries, entries, size))
173 goto out_free;
174
175 r = n;
176
177out_free:
178 vfree(entries);
179out:
180 return r;
181}
182
Carsten Otte043405e2007-10-10 17:16:19 +0200183long kvm_arch_dev_ioctl(struct file *filp,
184 unsigned int ioctl, unsigned long arg)
185{
186 void __user *argp = (void __user *)arg;
187 long r;
188
189 switch (ioctl) {
190 case KVM_GET_MSR_INDEX_LIST: {
191 struct kvm_msr_list __user *user_msr_list = argp;
192 struct kvm_msr_list msr_list;
193 unsigned n;
194
195 r = -EFAULT;
196 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
197 goto out;
198 n = msr_list.nmsrs;
199 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
200 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
201 goto out;
202 r = -E2BIG;
203 if (n < num_msrs_to_save)
204 goto out;
205 r = -EFAULT;
206 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
207 num_msrs_to_save * sizeof(u32)))
208 goto out;
209 if (copy_to_user(user_msr_list->indices
210 + num_msrs_to_save * sizeof(u32),
211 &emulated_msrs,
212 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
213 goto out;
214 r = 0;
215 break;
216 }
217 default:
218 r = -EINVAL;
219 }
220out:
221 return r;
222}
223
Carsten Otte313a3dc2007-10-11 19:16:52 +0200224void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
225{
226 kvm_x86_ops->vcpu_load(vcpu, cpu);
227}
228
229void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
230{
231 kvm_x86_ops->vcpu_put(vcpu);
232}
233
234static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
235{
236 u64 efer;
237 int i;
238 struct kvm_cpuid_entry *e, *entry;
239
240 rdmsrl(MSR_EFER, efer);
241 entry = NULL;
242 for (i = 0; i < vcpu->cpuid_nent; ++i) {
243 e = &vcpu->cpuid_entries[i];
244 if (e->function == 0x80000001) {
245 entry = e;
246 break;
247 }
248 }
249 if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) {
250 entry->edx &= ~(1 << 20);
251 printk(KERN_INFO "kvm: guest NX capability removed\n");
252 }
253}
254
255static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
256 struct kvm_cpuid *cpuid,
257 struct kvm_cpuid_entry __user *entries)
258{
259 int r;
260
261 r = -E2BIG;
262 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
263 goto out;
264 r = -EFAULT;
265 if (copy_from_user(&vcpu->cpuid_entries, entries,
266 cpuid->nent * sizeof(struct kvm_cpuid_entry)))
267 goto out;
268 vcpu->cpuid_nent = cpuid->nent;
269 cpuid_fix_nx_cap(vcpu);
270 return 0;
271
272out:
273 return r;
274}
275
276static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
277 struct kvm_lapic_state *s)
278{
279 vcpu_load(vcpu);
280 memcpy(s->regs, vcpu->apic->regs, sizeof *s);
281 vcpu_put(vcpu);
282
283 return 0;
284}
285
286static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
287 struct kvm_lapic_state *s)
288{
289 vcpu_load(vcpu);
290 memcpy(vcpu->apic->regs, s->regs, sizeof *s);
291 kvm_apic_post_state_restore(vcpu);
292 vcpu_put(vcpu);
293
294 return 0;
295}
296
297long kvm_arch_vcpu_ioctl(struct file *filp,
298 unsigned int ioctl, unsigned long arg)
299{
300 struct kvm_vcpu *vcpu = filp->private_data;
301 void __user *argp = (void __user *)arg;
302 int r;
303
304 switch (ioctl) {
305 case KVM_GET_LAPIC: {
306 struct kvm_lapic_state lapic;
307
308 memset(&lapic, 0, sizeof lapic);
309 r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
310 if (r)
311 goto out;
312 r = -EFAULT;
313 if (copy_to_user(argp, &lapic, sizeof lapic))
314 goto out;
315 r = 0;
316 break;
317 }
318 case KVM_SET_LAPIC: {
319 struct kvm_lapic_state lapic;
320
321 r = -EFAULT;
322 if (copy_from_user(&lapic, argp, sizeof lapic))
323 goto out;
324 r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
325 if (r)
326 goto out;
327 r = 0;
328 break;
329 }
330 case KVM_SET_CPUID: {
331 struct kvm_cpuid __user *cpuid_arg = argp;
332 struct kvm_cpuid cpuid;
333
334 r = -EFAULT;
335 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
336 goto out;
337 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
338 if (r)
339 goto out;
340 break;
341 }
342 case KVM_GET_MSRS:
343 r = msr_io(vcpu, argp, kvm_get_msr, 1);
344 break;
345 case KVM_SET_MSRS:
346 r = msr_io(vcpu, argp, do_set_msr, 0);
347 break;
348 default:
349 r = -EINVAL;
350 }
351out:
352 return r;
353}
354
Carsten Otte1fe779f2007-10-29 16:08:35 +0100355static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
356{
357 int ret;
358
359 if (addr > (unsigned int)(-3 * PAGE_SIZE))
360 return -1;
361 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
362 return ret;
363}
364
365static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
366 u32 kvm_nr_mmu_pages)
367{
368 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
369 return -EINVAL;
370
371 mutex_lock(&kvm->lock);
372
373 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
374 kvm->n_requested_mmu_pages = kvm_nr_mmu_pages;
375
376 mutex_unlock(&kvm->lock);
377 return 0;
378}
379
380static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
381{
382 return kvm->n_alloc_mmu_pages;
383}
384
385/*
386 * Set a new alias region. Aliases map a portion of physical memory into
387 * another portion. This is useful for memory windows, for example the PC
388 * VGA region.
389 */
390static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
391 struct kvm_memory_alias *alias)
392{
393 int r, n;
394 struct kvm_mem_alias *p;
395
396 r = -EINVAL;
397 /* General sanity checks */
398 if (alias->memory_size & (PAGE_SIZE - 1))
399 goto out;
400 if (alias->guest_phys_addr & (PAGE_SIZE - 1))
401 goto out;
402 if (alias->slot >= KVM_ALIAS_SLOTS)
403 goto out;
404 if (alias->guest_phys_addr + alias->memory_size
405 < alias->guest_phys_addr)
406 goto out;
407 if (alias->target_phys_addr + alias->memory_size
408 < alias->target_phys_addr)
409 goto out;
410
411 mutex_lock(&kvm->lock);
412
413 p = &kvm->aliases[alias->slot];
414 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
415 p->npages = alias->memory_size >> PAGE_SHIFT;
416 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
417
418 for (n = KVM_ALIAS_SLOTS; n > 0; --n)
419 if (kvm->aliases[n - 1].npages)
420 break;
421 kvm->naliases = n;
422
423 kvm_mmu_zap_all(kvm);
424
425 mutex_unlock(&kvm->lock);
426
427 return 0;
428
429out:
430 return r;
431}
432
433static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
434{
435 int r;
436
437 r = 0;
438 switch (chip->chip_id) {
439 case KVM_IRQCHIP_PIC_MASTER:
440 memcpy(&chip->chip.pic,
441 &pic_irqchip(kvm)->pics[0],
442 sizeof(struct kvm_pic_state));
443 break;
444 case KVM_IRQCHIP_PIC_SLAVE:
445 memcpy(&chip->chip.pic,
446 &pic_irqchip(kvm)->pics[1],
447 sizeof(struct kvm_pic_state));
448 break;
449 case KVM_IRQCHIP_IOAPIC:
450 memcpy(&chip->chip.ioapic,
451 ioapic_irqchip(kvm),
452 sizeof(struct kvm_ioapic_state));
453 break;
454 default:
455 r = -EINVAL;
456 break;
457 }
458 return r;
459}
460
461static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
462{
463 int r;
464
465 r = 0;
466 switch (chip->chip_id) {
467 case KVM_IRQCHIP_PIC_MASTER:
468 memcpy(&pic_irqchip(kvm)->pics[0],
469 &chip->chip.pic,
470 sizeof(struct kvm_pic_state));
471 break;
472 case KVM_IRQCHIP_PIC_SLAVE:
473 memcpy(&pic_irqchip(kvm)->pics[1],
474 &chip->chip.pic,
475 sizeof(struct kvm_pic_state));
476 break;
477 case KVM_IRQCHIP_IOAPIC:
478 memcpy(ioapic_irqchip(kvm),
479 &chip->chip.ioapic,
480 sizeof(struct kvm_ioapic_state));
481 break;
482 default:
483 r = -EINVAL;
484 break;
485 }
486 kvm_pic_update_irq(pic_irqchip(kvm));
487 return r;
488}
489
490long kvm_arch_vm_ioctl(struct file *filp,
491 unsigned int ioctl, unsigned long arg)
492{
493 struct kvm *kvm = filp->private_data;
494 void __user *argp = (void __user *)arg;
495 int r = -EINVAL;
496
497 switch (ioctl) {
498 case KVM_SET_TSS_ADDR:
499 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
500 if (r < 0)
501 goto out;
502 break;
503 case KVM_SET_MEMORY_REGION: {
504 struct kvm_memory_region kvm_mem;
505 struct kvm_userspace_memory_region kvm_userspace_mem;
506
507 r = -EFAULT;
508 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
509 goto out;
510 kvm_userspace_mem.slot = kvm_mem.slot;
511 kvm_userspace_mem.flags = kvm_mem.flags;
512 kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
513 kvm_userspace_mem.memory_size = kvm_mem.memory_size;
514 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
515 if (r)
516 goto out;
517 break;
518 }
519 case KVM_SET_NR_MMU_PAGES:
520 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
521 if (r)
522 goto out;
523 break;
524 case KVM_GET_NR_MMU_PAGES:
525 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
526 break;
527 case KVM_SET_MEMORY_ALIAS: {
528 struct kvm_memory_alias alias;
529
530 r = -EFAULT;
531 if (copy_from_user(&alias, argp, sizeof alias))
532 goto out;
533 r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
534 if (r)
535 goto out;
536 break;
537 }
538 case KVM_CREATE_IRQCHIP:
539 r = -ENOMEM;
540 kvm->vpic = kvm_create_pic(kvm);
541 if (kvm->vpic) {
542 r = kvm_ioapic_init(kvm);
543 if (r) {
544 kfree(kvm->vpic);
545 kvm->vpic = NULL;
546 goto out;
547 }
548 } else
549 goto out;
550 break;
551 case KVM_IRQ_LINE: {
552 struct kvm_irq_level irq_event;
553
554 r = -EFAULT;
555 if (copy_from_user(&irq_event, argp, sizeof irq_event))
556 goto out;
557 if (irqchip_in_kernel(kvm)) {
558 mutex_lock(&kvm->lock);
559 if (irq_event.irq < 16)
560 kvm_pic_set_irq(pic_irqchip(kvm),
561 irq_event.irq,
562 irq_event.level);
563 kvm_ioapic_set_irq(kvm->vioapic,
564 irq_event.irq,
565 irq_event.level);
566 mutex_unlock(&kvm->lock);
567 r = 0;
568 }
569 break;
570 }
571 case KVM_GET_IRQCHIP: {
572 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
573 struct kvm_irqchip chip;
574
575 r = -EFAULT;
576 if (copy_from_user(&chip, argp, sizeof chip))
577 goto out;
578 r = -ENXIO;
579 if (!irqchip_in_kernel(kvm))
580 goto out;
581 r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
582 if (r)
583 goto out;
584 r = -EFAULT;
585 if (copy_to_user(argp, &chip, sizeof chip))
586 goto out;
587 r = 0;
588 break;
589 }
590 case KVM_SET_IRQCHIP: {
591 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
592 struct kvm_irqchip chip;
593
594 r = -EFAULT;
595 if (copy_from_user(&chip, argp, sizeof chip))
596 goto out;
597 r = -ENXIO;
598 if (!irqchip_in_kernel(kvm))
599 goto out;
600 r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
601 if (r)
602 goto out;
603 r = 0;
604 break;
605 }
606 default:
607 ;
608 }
609out:
610 return r;
611}
612
Carsten Otte043405e2007-10-10 17:16:19 +0200613static __init void kvm_init_msr_list(void)
614{
615 u32 dummy[2];
616 unsigned i, j;
617
618 for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
619 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
620 continue;
621 if (j < i)
622 msrs_to_save[j] = msrs_to_save[i];
623 j++;
624 }
625 num_msrs_to_save = j;
626}
627
628__init void kvm_arch_init(void)
629{
630 kvm_init_msr_list();
631}