blob: 4166a08ce5007fca98793f255438905189df698c [file] [log] [blame]
Avi Kivity6aa8b732006-12-10 02:21:36 -08001/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * This module enables machines with Intel VT-x extensions to run virtual
5 * machines without emulation or binary translation.
6 *
7 * Copyright (C) 2006 Qumranet, Inc.
8 *
9 * Authors:
10 * Avi Kivity <avi@qumranet.com>
11 * Yaniv Kamay <yaniv@qumranet.com>
12 *
13 * This work is licensed under the terms of the GNU GPL, version 2. See
14 * the COPYING file in the top-level directory.
15 *
16 */
17
18#include "kvm.h"
Avi Kivitye4956062007-06-28 14:15:57 -040019#include "x86_emulate.h"
20#include "segment_descriptor.h"
Avi Kivity6aa8b732006-12-10 02:21:36 -080021
22#include <linux/kvm.h>
23#include <linux/module.h>
24#include <linux/errno.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080025#include <linux/percpu.h>
26#include <linux/gfp.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080027#include <linux/mm.h>
28#include <linux/miscdevice.h>
29#include <linux/vmalloc.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080030#include <linux/reboot.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080031#include <linux/debugfs.h>
32#include <linux/highmem.h>
33#include <linux/file.h>
Avi Kivity59ae6c62007-02-12 00:54:48 -080034#include <linux/sysdev.h>
Avi Kivity774c47f2007-02-12 00:54:47 -080035#include <linux/cpu.h>
Alexey Dobriyane8edc6e2007-05-21 01:22:52 +040036#include <linux/sched.h>
Avi Kivityd9e368d2007-06-07 19:18:30 +030037#include <linux/cpumask.h>
38#include <linux/smp.h>
Avi Kivityd6d28162007-06-28 08:38:16 -040039#include <linux/anon_inodes.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080040
Avi Kivitye4956062007-06-28 14:15:57 -040041#include <asm/processor.h>
42#include <asm/msr.h>
43#include <asm/io.h>
44#include <asm/uaccess.h>
45#include <asm/desc.h>
Avi Kivity6aa8b732006-12-10 02:21:36 -080046
47MODULE_AUTHOR("Qumranet");
48MODULE_LICENSE("GPL");
49
Avi Kivity133de902007-02-12 00:54:44 -080050static DEFINE_SPINLOCK(kvm_lock);
51static LIST_HEAD(vm_list);
52
Avi Kivity1b6c0162007-05-24 13:03:52 +030053static cpumask_t cpus_hardware_enabled;
54
Avi Kivity6aa8b732006-12-10 02:21:36 -080055struct kvm_arch_ops *kvm_arch_ops;
Rusty Russellc16f8622007-07-30 21:12:19 +100056struct kmem_cache *kvm_vcpu_cache;
57EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
Avi Kivity1165f5f2007-04-19 17:27:43 +030058
Avi Kivity15ad7142007-07-11 18:17:21 +030059static __read_mostly struct preempt_ops kvm_preempt_ops;
60
Avi Kivity1165f5f2007-04-19 17:27:43 +030061#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
Avi Kivity6aa8b732006-12-10 02:21:36 -080062
63static struct kvm_stats_debugfs_item {
64 const char *name;
Avi Kivity1165f5f2007-04-19 17:27:43 +030065 int offset;
Avi Kivity6aa8b732006-12-10 02:21:36 -080066 struct dentry *dentry;
67} debugfs_entries[] = {
Avi Kivity1165f5f2007-04-19 17:27:43 +030068 { "pf_fixed", STAT_OFFSET(pf_fixed) },
69 { "pf_guest", STAT_OFFSET(pf_guest) },
70 { "tlb_flush", STAT_OFFSET(tlb_flush) },
71 { "invlpg", STAT_OFFSET(invlpg) },
72 { "exits", STAT_OFFSET(exits) },
73 { "io_exits", STAT_OFFSET(io_exits) },
74 { "mmio_exits", STAT_OFFSET(mmio_exits) },
75 { "signal_exits", STAT_OFFSET(signal_exits) },
76 { "irq_window", STAT_OFFSET(irq_window_exits) },
77 { "halt_exits", STAT_OFFSET(halt_exits) },
78 { "request_irq", STAT_OFFSET(request_irq_exits) },
79 { "irq_exits", STAT_OFFSET(irq_exits) },
Avi Kivitye6adf282007-04-30 16:07:54 +030080 { "light_exits", STAT_OFFSET(light_exits) },
Eddie Dong2cc51562007-05-21 07:28:09 +030081 { "efer_reload", STAT_OFFSET(efer_reload) },
Avi Kivity1165f5f2007-04-19 17:27:43 +030082 { NULL }
Avi Kivity6aa8b732006-12-10 02:21:36 -080083};
84
85static struct dentry *debugfs_dir;
86
87#define MAX_IO_MSRS 256
88
Rusty Russell707d92f2007-07-17 23:19:08 +100089#define CR0_RESERVED_BITS \
90 (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
91 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
92 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
Rusty Russell66aee912007-07-17 23:34:16 +100093#define CR4_RESERVED_BITS \
94 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
95 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
96 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
97 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
98
Rusty Russell7075bc82007-07-17 23:37:17 +100099#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800100#define EFER_RESERVED_BITS 0xfffffffffffff2fe
101
Avi Kivity05b3e0c2006-12-13 00:33:45 -0800102#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -0800103// LDT or TSS descriptor in the GDT. 16 bytes.
104struct segment_descriptor_64 {
105 struct segment_descriptor s;
106 u32 base_higher;
107 u32 pad_zero;
108};
109
110#endif
111
Avi Kivitybccf2152007-02-21 18:04:26 +0200112static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
113 unsigned long arg);
114
Avi Kivity6aa8b732006-12-10 02:21:36 -0800115unsigned long segment_base(u16 selector)
116{
117 struct descriptor_table gdt;
118 struct segment_descriptor *d;
119 unsigned long table_base;
120 typedef unsigned long ul;
121 unsigned long v;
122
123 if (selector == 0)
124 return 0;
125
126 asm ("sgdt %0" : "=m"(gdt));
127 table_base = gdt.base;
128
129 if (selector & 4) { /* from ldt */
130 u16 ldt_selector;
131
132 asm ("sldt %0" : "=g"(ldt_selector));
133 table_base = segment_base(ldt_selector);
134 }
135 d = (struct segment_descriptor *)(table_base + (selector & ~7));
136 v = d->base_low | ((ul)d->base_mid << 16) | ((ul)d->base_high << 24);
Avi Kivity05b3e0c2006-12-13 00:33:45 -0800137#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -0800138 if (d->system == 0
139 && (d->type == 2 || d->type == 9 || d->type == 11))
140 v |= ((ul)((struct segment_descriptor_64 *)d)->base_higher) << 32;
141#endif
142 return v;
143}
144EXPORT_SYMBOL_GPL(segment_base);
145
James Morris5aacf0c2006-12-22 01:04:55 -0800146static inline int valid_vcpu(int n)
147{
148 return likely(n >= 0 && n < KVM_MAX_VCPUS);
149}
150
Avi Kivity7702fd12007-06-14 16:27:40 +0300151void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
152{
153 if (!vcpu->fpu_active || vcpu->guest_fpu_loaded)
154 return;
155
156 vcpu->guest_fpu_loaded = 1;
157 fx_save(vcpu->host_fx_image);
158 fx_restore(vcpu->guest_fx_image);
159}
160EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
161
162void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
163{
164 if (!vcpu->guest_fpu_loaded)
165 return;
166
167 vcpu->guest_fpu_loaded = 0;
168 fx_save(vcpu->guest_fx_image);
169 fx_restore(vcpu->host_fx_image);
170}
171EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
172
Avi Kivity6aa8b732006-12-10 02:21:36 -0800173/*
174 * Switches to specified vcpu, until a matching vcpu_put()
175 */
Avi Kivitybccf2152007-02-21 18:04:26 +0200176static void vcpu_load(struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800177{
Avi Kivity15ad7142007-07-11 18:17:21 +0300178 int cpu;
179
Avi Kivitybccf2152007-02-21 18:04:26 +0200180 mutex_lock(&vcpu->mutex);
Avi Kivity15ad7142007-07-11 18:17:21 +0300181 cpu = get_cpu();
182 preempt_notifier_register(&vcpu->preempt_notifier);
183 kvm_arch_ops->vcpu_load(vcpu, cpu);
184 put_cpu();
Avi Kivitybccf2152007-02-21 18:04:26 +0200185}
186
Avi Kivity6aa8b732006-12-10 02:21:36 -0800187static void vcpu_put(struct kvm_vcpu *vcpu)
188{
Avi Kivity15ad7142007-07-11 18:17:21 +0300189 preempt_disable();
Avi Kivity6aa8b732006-12-10 02:21:36 -0800190 kvm_arch_ops->vcpu_put(vcpu);
Avi Kivity15ad7142007-07-11 18:17:21 +0300191 preempt_notifier_unregister(&vcpu->preempt_notifier);
192 preempt_enable();
Avi Kivity6aa8b732006-12-10 02:21:36 -0800193 mutex_unlock(&vcpu->mutex);
194}
195
Avi Kivityd9e368d2007-06-07 19:18:30 +0300196static void ack_flush(void *_completed)
197{
198 atomic_t *completed = _completed;
199
200 atomic_inc(completed);
201}
202
203void kvm_flush_remote_tlbs(struct kvm *kvm)
204{
205 int i, cpu, needed;
206 cpumask_t cpus;
207 struct kvm_vcpu *vcpu;
208 atomic_t completed;
209
210 atomic_set(&completed, 0);
211 cpus_clear(cpus);
212 needed = 0;
Rusty Russellfb3f0f52007-07-27 17:16:56 +1000213 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
214 vcpu = kvm->vcpus[i];
215 if (!vcpu)
216 continue;
Avi Kivityd9e368d2007-06-07 19:18:30 +0300217 if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests))
218 continue;
219 cpu = vcpu->cpu;
220 if (cpu != -1 && cpu != raw_smp_processor_id())
221 if (!cpu_isset(cpu, cpus)) {
222 cpu_set(cpu, cpus);
223 ++needed;
224 }
225 }
226
227 /*
228 * We really want smp_call_function_mask() here. But that's not
229 * available, so ipi all cpus in parallel and wait for them
230 * to complete.
231 */
232 for (cpu = first_cpu(cpus); cpu != NR_CPUS; cpu = next_cpu(cpu, cpus))
233 smp_call_function_single(cpu, ack_flush, &completed, 1, 0);
234 while (atomic_read(&completed) != needed) {
235 cpu_relax();
236 barrier();
237 }
238}
239
Rusty Russellfb3f0f52007-07-27 17:16:56 +1000240int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
241{
242 struct page *page;
243 int r;
244
245 mutex_init(&vcpu->mutex);
246 vcpu->cpu = -1;
247 vcpu->mmu.root_hpa = INVALID_PAGE;
248 vcpu->kvm = kvm;
249 vcpu->vcpu_id = id;
250
251 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
252 if (!page) {
253 r = -ENOMEM;
254 goto fail;
255 }
256 vcpu->run = page_address(page);
257
258 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
259 if (!page) {
260 r = -ENOMEM;
261 goto fail_free_run;
262 }
263 vcpu->pio_data = page_address(page);
264
265 vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
266 FX_IMAGE_ALIGN);
267 vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
268
269 r = kvm_mmu_create(vcpu);
270 if (r < 0)
271 goto fail_free_pio_data;
272
273 return 0;
274
275fail_free_pio_data:
276 free_page((unsigned long)vcpu->pio_data);
277fail_free_run:
278 free_page((unsigned long)vcpu->run);
279fail:
280 return -ENOMEM;
281}
282EXPORT_SYMBOL_GPL(kvm_vcpu_init);
283
284void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
285{
286 kvm_mmu_destroy(vcpu);
287 free_page((unsigned long)vcpu->pio_data);
288 free_page((unsigned long)vcpu->run);
289}
290EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
291
Avi Kivityf17abe92007-02-21 19:28:04 +0200292static struct kvm *kvm_create_vm(void)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800293{
294 struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800295
296 if (!kvm)
Avi Kivityf17abe92007-02-21 19:28:04 +0200297 return ERR_PTR(-ENOMEM);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800298
Eddie Dong74906342007-06-19 18:05:03 +0300299 kvm_io_bus_init(&kvm->pio_bus);
Shaohua Li11ec2802007-07-23 14:51:37 +0800300 mutex_init(&kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800301 INIT_LIST_HEAD(&kvm->active_mmu_pages);
Gregory Haskins2eeb2e92007-05-31 14:08:53 -0400302 kvm_io_bus_init(&kvm->mmio_bus);
Rusty Russell5e58cfe2007-07-23 17:08:21 +1000303 spin_lock(&kvm_lock);
304 list_add(&kvm->vm_list, &vm_list);
305 spin_unlock(&kvm_lock);
Avi Kivityf17abe92007-02-21 19:28:04 +0200306 return kvm;
307}
308
309static int kvm_dev_open(struct inode *inode, struct file *filp)
310{
Avi Kivity6aa8b732006-12-10 02:21:36 -0800311 return 0;
312}
313
314/*
315 * Free any memory in @free but not in @dont.
316 */
317static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
318 struct kvm_memory_slot *dont)
319{
320 int i;
321
322 if (!dont || free->phys_mem != dont->phys_mem)
323 if (free->phys_mem) {
324 for (i = 0; i < free->npages; ++i)
Avi Kivity55a54f72006-12-29 16:49:58 -0800325 if (free->phys_mem[i])
326 __free_page(free->phys_mem[i]);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800327 vfree(free->phys_mem);
328 }
329
330 if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
331 vfree(free->dirty_bitmap);
332
Al Viro8b6d44c2007-02-09 16:38:40 +0000333 free->phys_mem = NULL;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800334 free->npages = 0;
Al Viro8b6d44c2007-02-09 16:38:40 +0000335 free->dirty_bitmap = NULL;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800336}
337
338static void kvm_free_physmem(struct kvm *kvm)
339{
340 int i;
341
342 for (i = 0; i < kvm->nmemslots; ++i)
Al Viro8b6d44c2007-02-09 16:38:40 +0000343 kvm_free_physmem_slot(&kvm->memslots[i], NULL);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800344}
345
Avi Kivity039576c2007-03-20 12:46:50 +0200346static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
347{
348 int i;
349
Rusty Russell3077c4512007-07-30 16:41:57 +1000350 for (i = 0; i < ARRAY_SIZE(vcpu->pio.guest_pages); ++i)
Avi Kivity039576c2007-03-20 12:46:50 +0200351 if (vcpu->pio.guest_pages[i]) {
352 __free_page(vcpu->pio.guest_pages[i]);
353 vcpu->pio.guest_pages[i] = NULL;
354 }
355}
356
Avi Kivity7b53aa52007-06-05 12:17:03 +0300357static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
358{
Avi Kivity7b53aa52007-06-05 12:17:03 +0300359 vcpu_load(vcpu);
360 kvm_mmu_unload(vcpu);
361 vcpu_put(vcpu);
362}
363
Avi Kivity6aa8b732006-12-10 02:21:36 -0800364static void kvm_free_vcpus(struct kvm *kvm)
365{
366 unsigned int i;
367
Avi Kivity7b53aa52007-06-05 12:17:03 +0300368 /*
369 * Unpin any mmu pages first.
370 */
371 for (i = 0; i < KVM_MAX_VCPUS; ++i)
Rusty Russellfb3f0f52007-07-27 17:16:56 +1000372 if (kvm->vcpus[i])
373 kvm_unload_vcpu_mmu(kvm->vcpus[i]);
374 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
375 if (kvm->vcpus[i]) {
376 kvm_arch_ops->vcpu_free(kvm->vcpus[i]);
377 kvm->vcpus[i] = NULL;
378 }
379 }
380
Avi Kivity6aa8b732006-12-10 02:21:36 -0800381}
382
383static int kvm_dev_release(struct inode *inode, struct file *filp)
384{
Avi Kivityf17abe92007-02-21 19:28:04 +0200385 return 0;
386}
Avi Kivity6aa8b732006-12-10 02:21:36 -0800387
Avi Kivityf17abe92007-02-21 19:28:04 +0200388static void kvm_destroy_vm(struct kvm *kvm)
389{
Avi Kivity133de902007-02-12 00:54:44 -0800390 spin_lock(&kvm_lock);
391 list_del(&kvm->vm_list);
392 spin_unlock(&kvm_lock);
Eddie Dong74906342007-06-19 18:05:03 +0300393 kvm_io_bus_destroy(&kvm->pio_bus);
Gregory Haskins2eeb2e92007-05-31 14:08:53 -0400394 kvm_io_bus_destroy(&kvm->mmio_bus);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800395 kvm_free_vcpus(kvm);
396 kvm_free_physmem(kvm);
397 kfree(kvm);
Avi Kivityf17abe92007-02-21 19:28:04 +0200398}
399
400static int kvm_vm_release(struct inode *inode, struct file *filp)
401{
402 struct kvm *kvm = filp->private_data;
403
404 kvm_destroy_vm(kvm);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800405 return 0;
406}
407
408static void inject_gp(struct kvm_vcpu *vcpu)
409{
410 kvm_arch_ops->inject_gp(vcpu, 0);
411}
412
Avi Kivity1342d352007-01-05 16:36:39 -0800413/*
414 * Load the pae pdptrs. Return true is they are all valid.
415 */
416static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800417{
418 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
Avi Kivity1342d352007-01-05 16:36:39 -0800419 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800420 int i;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800421 u64 *pdpt;
Avi Kivity1342d352007-01-05 16:36:39 -0800422 int ret;
Avi Kivity954bbbc22007-03-30 14:02:32 +0300423 struct page *page;
Rusty Russellc820c2a2007-07-25 13:29:51 +1000424 u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)];
Avi Kivity6aa8b732006-12-10 02:21:36 -0800425
Shaohua Li11ec2802007-07-23 14:51:37 +0800426 mutex_lock(&vcpu->kvm->lock);
Avi Kivity954bbbc22007-03-30 14:02:32 +0300427 page = gfn_to_page(vcpu->kvm, pdpt_gfn);
Rusty Russellc820c2a2007-07-25 13:29:51 +1000428 if (!page) {
429 ret = 0;
430 goto out;
431 }
Avi Kivity6aa8b732006-12-10 02:21:36 -0800432
Rusty Russellc820c2a2007-07-25 13:29:51 +1000433 pdpt = kmap_atomic(page, KM_USER0);
434 memcpy(pdpte, pdpt+offset, sizeof(pdpte));
435 kunmap_atomic(pdpt, KM_USER0);
436
437 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
438 if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
Avi Kivity1342d352007-01-05 16:36:39 -0800439 ret = 0;
440 goto out;
441 }
Avi Kivity6aa8b732006-12-10 02:21:36 -0800442 }
Rusty Russellc820c2a2007-07-25 13:29:51 +1000443 ret = 1;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800444
Rusty Russellc820c2a2007-07-25 13:29:51 +1000445 memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs));
Avi Kivity1342d352007-01-05 16:36:39 -0800446out:
Shaohua Li11ec2802007-07-23 14:51:37 +0800447 mutex_unlock(&vcpu->kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800448
Avi Kivity1342d352007-01-05 16:36:39 -0800449 return ret;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800450}
451
452void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
453{
Rusty Russell707d92f2007-07-17 23:19:08 +1000454 if (cr0 & CR0_RESERVED_BITS) {
Avi Kivity6aa8b732006-12-10 02:21:36 -0800455 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
456 cr0, vcpu->cr0);
457 inject_gp(vcpu);
458 return;
459 }
460
Rusty Russell707d92f2007-07-17 23:19:08 +1000461 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
Avi Kivity6aa8b732006-12-10 02:21:36 -0800462 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
463 inject_gp(vcpu);
464 return;
465 }
466
Rusty Russell707d92f2007-07-17 23:19:08 +1000467 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
Avi Kivity6aa8b732006-12-10 02:21:36 -0800468 printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
469 "and a clear PE flag\n");
470 inject_gp(vcpu);
471 return;
472 }
473
Rusty Russell707d92f2007-07-17 23:19:08 +1000474 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
Avi Kivity05b3e0c2006-12-13 00:33:45 -0800475#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -0800476 if ((vcpu->shadow_efer & EFER_LME)) {
477 int cs_db, cs_l;
478
479 if (!is_pae(vcpu)) {
480 printk(KERN_DEBUG "set_cr0: #GP, start paging "
481 "in long mode while PAE is disabled\n");
482 inject_gp(vcpu);
483 return;
484 }
485 kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
486 if (cs_l) {
487 printk(KERN_DEBUG "set_cr0: #GP, start paging "
488 "in long mode while CS.L == 1\n");
489 inject_gp(vcpu);
490 return;
491
492 }
493 } else
494#endif
Avi Kivity1342d352007-01-05 16:36:39 -0800495 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->cr3)) {
Avi Kivity6aa8b732006-12-10 02:21:36 -0800496 printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
497 "reserved bits\n");
498 inject_gp(vcpu);
499 return;
500 }
501
502 }
503
504 kvm_arch_ops->set_cr0(vcpu, cr0);
505 vcpu->cr0 = cr0;
506
Shaohua Li11ec2802007-07-23 14:51:37 +0800507 mutex_lock(&vcpu->kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800508 kvm_mmu_reset_context(vcpu);
Shaohua Li11ec2802007-07-23 14:51:37 +0800509 mutex_unlock(&vcpu->kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800510 return;
511}
512EXPORT_SYMBOL_GPL(set_cr0);
513
514void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
515{
516 set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f));
517}
518EXPORT_SYMBOL_GPL(lmsw);
519
520void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
521{
Rusty Russell66aee912007-07-17 23:34:16 +1000522 if (cr4 & CR4_RESERVED_BITS) {
Avi Kivity6aa8b732006-12-10 02:21:36 -0800523 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
524 inject_gp(vcpu);
525 return;
526 }
527
Avi Kivitya9058ec2006-12-29 16:49:37 -0800528 if (is_long_mode(vcpu)) {
Rusty Russell66aee912007-07-17 23:34:16 +1000529 if (!(cr4 & X86_CR4_PAE)) {
Avi Kivity6aa8b732006-12-10 02:21:36 -0800530 printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
531 "in long mode\n");
532 inject_gp(vcpu);
533 return;
534 }
Rusty Russell66aee912007-07-17 23:34:16 +1000535 } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE)
Avi Kivity1342d352007-01-05 16:36:39 -0800536 && !load_pdptrs(vcpu, vcpu->cr3)) {
Avi Kivity6aa8b732006-12-10 02:21:36 -0800537 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
538 inject_gp(vcpu);
Rusty Russell310bc762007-07-23 17:11:02 +1000539 return;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800540 }
541
Rusty Russell66aee912007-07-17 23:34:16 +1000542 if (cr4 & X86_CR4_VMXE) {
Avi Kivity6aa8b732006-12-10 02:21:36 -0800543 printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
544 inject_gp(vcpu);
545 return;
546 }
547 kvm_arch_ops->set_cr4(vcpu, cr4);
Shaohua Li11ec2802007-07-23 14:51:37 +0800548 mutex_lock(&vcpu->kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800549 kvm_mmu_reset_context(vcpu);
Shaohua Li11ec2802007-07-23 14:51:37 +0800550 mutex_unlock(&vcpu->kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800551}
552EXPORT_SYMBOL_GPL(set_cr4);
553
554void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
555{
Avi Kivitya9058ec2006-12-29 16:49:37 -0800556 if (is_long_mode(vcpu)) {
Rusty Russellf802a302007-07-17 23:32:55 +1000557 if (cr3 & CR3_L_MODE_RESERVED_BITS) {
Avi Kivity6aa8b732006-12-10 02:21:36 -0800558 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
559 inject_gp(vcpu);
560 return;
561 }
562 } else {
Rusty Russellf802a302007-07-17 23:32:55 +1000563 if (is_pae(vcpu)) {
564 if (cr3 & CR3_PAE_RESERVED_BITS) {
565 printk(KERN_DEBUG
566 "set_cr3: #GP, reserved bits\n");
567 inject_gp(vcpu);
568 return;
569 }
570 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
571 printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
572 "reserved bits\n");
573 inject_gp(vcpu);
574 return;
575 }
576 } else {
577 if (cr3 & CR3_NONPAE_RESERVED_BITS) {
578 printk(KERN_DEBUG
579 "set_cr3: #GP, reserved bits\n");
580 inject_gp(vcpu);
581 return;
582 }
Avi Kivity6aa8b732006-12-10 02:21:36 -0800583 }
584 }
585
586 vcpu->cr3 = cr3;
Shaohua Li11ec2802007-07-23 14:51:37 +0800587 mutex_lock(&vcpu->kvm->lock);
Ingo Molnard21225e2007-01-05 16:36:59 -0800588 /*
589 * Does the new cr3 value map to physical memory? (Note, we
590 * catch an invalid cr3 even in real-mode, because it would
591 * cause trouble later on when we turn on paging anyway.)
592 *
593 * A real CPU would silently accept an invalid cr3 and would
594 * attempt to use it - with largely undefined (and often hard
595 * to debug) behavior on the guest side.
596 */
597 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
598 inject_gp(vcpu);
599 else
600 vcpu->mmu.new_cr3(vcpu);
Shaohua Li11ec2802007-07-23 14:51:37 +0800601 mutex_unlock(&vcpu->kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800602}
603EXPORT_SYMBOL_GPL(set_cr3);
604
605void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
606{
Rusty Russell7075bc82007-07-17 23:37:17 +1000607 if (cr8 & CR8_RESERVED_BITS) {
Avi Kivity6aa8b732006-12-10 02:21:36 -0800608 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
609 inject_gp(vcpu);
610 return;
611 }
612 vcpu->cr8 = cr8;
613}
614EXPORT_SYMBOL_GPL(set_cr8);
615
616void fx_init(struct kvm_vcpu *vcpu)
617{
618 struct __attribute__ ((__packed__)) fx_image_s {
619 u16 control; //fcw
620 u16 status; //fsw
621 u16 tag; // ftw
622 u16 opcode; //fop
623 u64 ip; // fpu ip
624 u64 operand;// fpu dp
625 u32 mxcsr;
626 u32 mxcsr_mask;
627
628 } *fx_image;
629
Rusty Russell9bd01502007-07-30 16:29:56 +1000630 /* Initialize guest FPU by resetting ours and saving into guest's */
631 preempt_disable();
Avi Kivity6aa8b732006-12-10 02:21:36 -0800632 fx_save(vcpu->host_fx_image);
633 fpu_init();
634 fx_save(vcpu->guest_fx_image);
635 fx_restore(vcpu->host_fx_image);
Rusty Russell9bd01502007-07-30 16:29:56 +1000636 preempt_enable();
Avi Kivity6aa8b732006-12-10 02:21:36 -0800637
638 fx_image = (struct fx_image_s *)vcpu->guest_fx_image;
639 fx_image->mxcsr = 0x1f80;
640 memset(vcpu->guest_fx_image + sizeof(struct fx_image_s),
641 0, FX_IMAGE_SIZE - sizeof(struct fx_image_s));
642}
643EXPORT_SYMBOL_GPL(fx_init);
644
645/*
Avi Kivity6aa8b732006-12-10 02:21:36 -0800646 * Allocate some memory and give it an address in the guest physical address
647 * space.
648 *
649 * Discontiguous memory is allowed, mostly for framebuffers.
650 */
Avi Kivity2c6f5df2007-02-20 18:27:58 +0200651static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
652 struct kvm_memory_region *mem)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800653{
654 int r;
655 gfn_t base_gfn;
656 unsigned long npages;
657 unsigned long i;
658 struct kvm_memory_slot *memslot;
659 struct kvm_memory_slot old, new;
660 int memory_config_version;
661
662 r = -EINVAL;
663 /* General sanity checks */
664 if (mem->memory_size & (PAGE_SIZE - 1))
665 goto out;
666 if (mem->guest_phys_addr & (PAGE_SIZE - 1))
667 goto out;
668 if (mem->slot >= KVM_MEMORY_SLOTS)
669 goto out;
670 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
671 goto out;
672
673 memslot = &kvm->memslots[mem->slot];
674 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
675 npages = mem->memory_size >> PAGE_SHIFT;
676
677 if (!npages)
678 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
679
680raced:
Shaohua Li11ec2802007-07-23 14:51:37 +0800681 mutex_lock(&kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800682
683 memory_config_version = kvm->memory_config_version;
684 new = old = *memslot;
685
686 new.base_gfn = base_gfn;
687 new.npages = npages;
688 new.flags = mem->flags;
689
690 /* Disallow changing a memory slot's size. */
691 r = -EINVAL;
692 if (npages && old.npages && npages != old.npages)
693 goto out_unlock;
694
695 /* Check for overlaps */
696 r = -EEXIST;
697 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
698 struct kvm_memory_slot *s = &kvm->memslots[i];
699
700 if (s == memslot)
701 continue;
702 if (!((base_gfn + npages <= s->base_gfn) ||
703 (base_gfn >= s->base_gfn + s->npages)))
704 goto out_unlock;
705 }
706 /*
707 * Do memory allocations outside lock. memory_config_version will
708 * detect any races.
709 */
Shaohua Li11ec2802007-07-23 14:51:37 +0800710 mutex_unlock(&kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800711
712 /* Deallocate if slot is being removed */
713 if (!npages)
Al Viro8b6d44c2007-02-09 16:38:40 +0000714 new.phys_mem = NULL;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800715
716 /* Free page dirty bitmap if unneeded */
717 if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
Al Viro8b6d44c2007-02-09 16:38:40 +0000718 new.dirty_bitmap = NULL;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800719
720 r = -ENOMEM;
721
722 /* Allocate if a slot is being created */
723 if (npages && !new.phys_mem) {
724 new.phys_mem = vmalloc(npages * sizeof(struct page *));
725
726 if (!new.phys_mem)
727 goto out_free;
728
729 memset(new.phys_mem, 0, npages * sizeof(struct page *));
730 for (i = 0; i < npages; ++i) {
731 new.phys_mem[i] = alloc_page(GFP_HIGHUSER
732 | __GFP_ZERO);
733 if (!new.phys_mem[i])
734 goto out_free;
Markus Rechberger5972e952007-02-19 14:37:47 +0200735 set_page_private(new.phys_mem[i],0);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800736 }
737 }
738
739 /* Allocate page dirty bitmap if needed */
740 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
741 unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8;
742
743 new.dirty_bitmap = vmalloc(dirty_bytes);
744 if (!new.dirty_bitmap)
745 goto out_free;
746 memset(new.dirty_bitmap, 0, dirty_bytes);
747 }
748
Shaohua Li11ec2802007-07-23 14:51:37 +0800749 mutex_lock(&kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800750
751 if (memory_config_version != kvm->memory_config_version) {
Shaohua Li11ec2802007-07-23 14:51:37 +0800752 mutex_unlock(&kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800753 kvm_free_physmem_slot(&new, &old);
754 goto raced;
755 }
756
757 r = -EAGAIN;
758 if (kvm->busy)
759 goto out_unlock;
760
761 if (mem->slot >= kvm->nmemslots)
762 kvm->nmemslots = mem->slot + 1;
763
764 *memslot = new;
765 ++kvm->memory_config_version;
766
Avi Kivity90cb0522007-07-17 13:04:56 +0300767 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
768 kvm_flush_remote_tlbs(kvm);
769
Shaohua Li11ec2802007-07-23 14:51:37 +0800770 mutex_unlock(&kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800771
Avi Kivity6aa8b732006-12-10 02:21:36 -0800772 kvm_free_physmem_slot(&old, &new);
773 return 0;
774
775out_unlock:
Shaohua Li11ec2802007-07-23 14:51:37 +0800776 mutex_unlock(&kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800777out_free:
778 kvm_free_physmem_slot(&new, &old);
779out:
780 return r;
781}
782
783/*
784 * Get (and clear) the dirty memory log for a memory slot.
785 */
Avi Kivity2c6f5df2007-02-20 18:27:58 +0200786static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
787 struct kvm_dirty_log *log)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800788{
789 struct kvm_memory_slot *memslot;
790 int r, i;
791 int n;
792 unsigned long any = 0;
793
Shaohua Li11ec2802007-07-23 14:51:37 +0800794 mutex_lock(&kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800795
796 /*
797 * Prevent changes to guest memory configuration even while the lock
798 * is not taken.
799 */
800 ++kvm->busy;
Shaohua Li11ec2802007-07-23 14:51:37 +0800801 mutex_unlock(&kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800802 r = -EINVAL;
803 if (log->slot >= KVM_MEMORY_SLOTS)
804 goto out;
805
806 memslot = &kvm->memslots[log->slot];
807 r = -ENOENT;
808 if (!memslot->dirty_bitmap)
809 goto out;
810
Uri Lublincd1a4a92007-02-22 16:43:09 +0200811 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800812
Uri Lublincd1a4a92007-02-22 16:43:09 +0200813 for (i = 0; !any && i < n/sizeof(long); ++i)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800814 any = memslot->dirty_bitmap[i];
815
816 r = -EFAULT;
817 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
818 goto out;
819
Shaohua Li11ec2802007-07-23 14:51:37 +0800820 mutex_lock(&kvm->lock);
Avi Kivity90cb0522007-07-17 13:04:56 +0300821 kvm_mmu_slot_remove_write_access(kvm, log->slot);
822 kvm_flush_remote_tlbs(kvm);
823 memset(memslot->dirty_bitmap, 0, n);
Shaohua Li11ec2802007-07-23 14:51:37 +0800824 mutex_unlock(&kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800825
826 r = 0;
827
828out:
Shaohua Li11ec2802007-07-23 14:51:37 +0800829 mutex_lock(&kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800830 --kvm->busy;
Shaohua Li11ec2802007-07-23 14:51:37 +0800831 mutex_unlock(&kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800832 return r;
833}
834
Avi Kivitye8207542007-03-30 16:54:30 +0300835/*
836 * Set a new alias region. Aliases map a portion of physical memory into
837 * another portion. This is useful for memory windows, for example the PC
838 * VGA region.
839 */
840static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
841 struct kvm_memory_alias *alias)
842{
843 int r, n;
844 struct kvm_mem_alias *p;
845
846 r = -EINVAL;
847 /* General sanity checks */
848 if (alias->memory_size & (PAGE_SIZE - 1))
849 goto out;
850 if (alias->guest_phys_addr & (PAGE_SIZE - 1))
851 goto out;
852 if (alias->slot >= KVM_ALIAS_SLOTS)
853 goto out;
854 if (alias->guest_phys_addr + alias->memory_size
855 < alias->guest_phys_addr)
856 goto out;
857 if (alias->target_phys_addr + alias->memory_size
858 < alias->target_phys_addr)
859 goto out;
860
Shaohua Li11ec2802007-07-23 14:51:37 +0800861 mutex_lock(&kvm->lock);
Avi Kivitye8207542007-03-30 16:54:30 +0300862
863 p = &kvm->aliases[alias->slot];
864 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
865 p->npages = alias->memory_size >> PAGE_SHIFT;
866 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
867
868 for (n = KVM_ALIAS_SLOTS; n > 0; --n)
869 if (kvm->aliases[n - 1].npages)
870 break;
871 kvm->naliases = n;
872
Avi Kivity90cb0522007-07-17 13:04:56 +0300873 kvm_mmu_zap_all(kvm);
Avi Kivitye8207542007-03-30 16:54:30 +0300874
Shaohua Li11ec2802007-07-23 14:51:37 +0800875 mutex_unlock(&kvm->lock);
Avi Kivitye8207542007-03-30 16:54:30 +0300876
877 return 0;
878
879out:
880 return r;
881}
882
883static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
884{
885 int i;
886 struct kvm_mem_alias *alias;
887
888 for (i = 0; i < kvm->naliases; ++i) {
889 alias = &kvm->aliases[i];
890 if (gfn >= alias->base_gfn
891 && gfn < alias->base_gfn + alias->npages)
892 return alias->target_gfn + gfn - alias->base_gfn;
893 }
894 return gfn;
895}
896
897static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800898{
899 int i;
900
901 for (i = 0; i < kvm->nmemslots; ++i) {
902 struct kvm_memory_slot *memslot = &kvm->memslots[i];
903
904 if (gfn >= memslot->base_gfn
905 && gfn < memslot->base_gfn + memslot->npages)
906 return memslot;
907 }
Al Viro8b6d44c2007-02-09 16:38:40 +0000908 return NULL;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800909}
Avi Kivitye8207542007-03-30 16:54:30 +0300910
911struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
912{
913 gfn = unalias_gfn(kvm, gfn);
914 return __gfn_to_memslot(kvm, gfn);
915}
Avi Kivity6aa8b732006-12-10 02:21:36 -0800916
Avi Kivity954bbbc22007-03-30 14:02:32 +0300917struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
918{
919 struct kvm_memory_slot *slot;
920
Avi Kivitye8207542007-03-30 16:54:30 +0300921 gfn = unalias_gfn(kvm, gfn);
922 slot = __gfn_to_memslot(kvm, gfn);
Avi Kivity954bbbc22007-03-30 14:02:32 +0300923 if (!slot)
924 return NULL;
925 return slot->phys_mem[gfn - slot->base_gfn];
926}
927EXPORT_SYMBOL_GPL(gfn_to_page);
928
Avi Kivity6aa8b732006-12-10 02:21:36 -0800929void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
930{
931 int i;
Nguyen Anh Quynh31389942007-06-05 10:35:19 +0300932 struct kvm_memory_slot *memslot;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800933 unsigned long rel_gfn;
934
935 for (i = 0; i < kvm->nmemslots; ++i) {
936 memslot = &kvm->memslots[i];
937
938 if (gfn >= memslot->base_gfn
939 && gfn < memslot->base_gfn + memslot->npages) {
940
Nguyen Anh Quynh31389942007-06-05 10:35:19 +0300941 if (!memslot->dirty_bitmap)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800942 return;
943
944 rel_gfn = gfn - memslot->base_gfn;
945
946 /* avoid RMW */
947 if (!test_bit(rel_gfn, memslot->dirty_bitmap))
948 set_bit(rel_gfn, memslot->dirty_bitmap);
949 return;
950 }
951 }
952}
953
Laurent Viviere7d5d762007-07-30 13:41:19 +0300954int emulator_read_std(unsigned long addr,
Avi Kivity4c690a12007-04-22 15:28:19 +0300955 void *val,
Avi Kivity6aa8b732006-12-10 02:21:36 -0800956 unsigned int bytes,
Laurent Viviercebff022007-07-30 13:35:24 +0300957 struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800958{
Avi Kivity6aa8b732006-12-10 02:21:36 -0800959 void *data = val;
960
961 while (bytes) {
962 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
963 unsigned offset = addr & (PAGE_SIZE-1);
964 unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
965 unsigned long pfn;
Avi Kivity954bbbc22007-03-30 14:02:32 +0300966 struct page *page;
967 void *page_virt;
Avi Kivity6aa8b732006-12-10 02:21:36 -0800968
969 if (gpa == UNMAPPED_GVA)
970 return X86EMUL_PROPAGATE_FAULT;
971 pfn = gpa >> PAGE_SHIFT;
Avi Kivity954bbbc22007-03-30 14:02:32 +0300972 page = gfn_to_page(vcpu->kvm, pfn);
973 if (!page)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800974 return X86EMUL_UNHANDLEABLE;
Avi Kivity954bbbc22007-03-30 14:02:32 +0300975 page_virt = kmap_atomic(page, KM_USER0);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800976
Avi Kivity954bbbc22007-03-30 14:02:32 +0300977 memcpy(data, page_virt + offset, tocopy);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800978
Avi Kivity954bbbc22007-03-30 14:02:32 +0300979 kunmap_atomic(page_virt, KM_USER0);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800980
981 bytes -= tocopy;
982 data += tocopy;
983 addr += tocopy;
984 }
985
986 return X86EMUL_CONTINUE;
987}
Laurent Viviere7d5d762007-07-30 13:41:19 +0300988EXPORT_SYMBOL_GPL(emulator_read_std);
Avi Kivity6aa8b732006-12-10 02:21:36 -0800989
990static int emulator_write_std(unsigned long addr,
Avi Kivity4c690a12007-04-22 15:28:19 +0300991 const void *val,
Avi Kivity6aa8b732006-12-10 02:21:36 -0800992 unsigned int bytes,
Laurent Viviercebff022007-07-30 13:35:24 +0300993 struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -0800994{
995 printk(KERN_ERR "emulator_write_std: addr %lx n %d\n",
996 addr, bytes);
997 return X86EMUL_UNHANDLEABLE;
998}
999
Gregory Haskins2eeb2e92007-05-31 14:08:53 -04001000static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
1001 gpa_t addr)
1002{
1003 /*
1004 * Note that its important to have this wrapper function because
1005 * in the very near future we will be checking for MMIOs against
1006 * the LAPIC as well as the general MMIO bus
1007 */
1008 return kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
1009}
1010
Eddie Dong74906342007-06-19 18:05:03 +03001011static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
1012 gpa_t addr)
1013{
1014 return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr);
1015}
1016
Avi Kivity6aa8b732006-12-10 02:21:36 -08001017static int emulator_read_emulated(unsigned long addr,
Avi Kivity4c690a12007-04-22 15:28:19 +03001018 void *val,
Avi Kivity6aa8b732006-12-10 02:21:36 -08001019 unsigned int bytes,
Laurent Viviercebff022007-07-30 13:35:24 +03001020 struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001021{
Gregory Haskins2eeb2e92007-05-31 14:08:53 -04001022 struct kvm_io_device *mmio_dev;
1023 gpa_t gpa;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001024
1025 if (vcpu->mmio_read_completed) {
1026 memcpy(val, vcpu->mmio_data, bytes);
1027 vcpu->mmio_read_completed = 0;
1028 return X86EMUL_CONTINUE;
Laurent Viviercebff022007-07-30 13:35:24 +03001029 } else if (emulator_read_std(addr, val, bytes, vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001030 == X86EMUL_CONTINUE)
1031 return X86EMUL_CONTINUE;
Avi Kivityd27d4ac2007-02-19 14:37:46 +02001032
Gregory Haskins2eeb2e92007-05-31 14:08:53 -04001033 gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
1034 if (gpa == UNMAPPED_GVA)
1035 return X86EMUL_PROPAGATE_FAULT;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001036
Gregory Haskins2eeb2e92007-05-31 14:08:53 -04001037 /*
1038 * Is this MMIO handled locally?
1039 */
1040 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
1041 if (mmio_dev) {
1042 kvm_iodevice_read(mmio_dev, gpa, bytes, val);
1043 return X86EMUL_CONTINUE;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001044 }
Gregory Haskins2eeb2e92007-05-31 14:08:53 -04001045
1046 vcpu->mmio_needed = 1;
1047 vcpu->mmio_phys_addr = gpa;
1048 vcpu->mmio_size = bytes;
1049 vcpu->mmio_is_write = 0;
1050
1051 return X86EMUL_UNHANDLEABLE;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001052}
1053
Avi Kivityda4a00f2007-01-05 16:36:44 -08001054static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
Avi Kivity4c690a12007-04-22 15:28:19 +03001055 const void *val, int bytes)
Avi Kivityda4a00f2007-01-05 16:36:44 -08001056{
Avi Kivityda4a00f2007-01-05 16:36:44 -08001057 struct page *page;
1058 void *virt;
1059
1060 if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT))
1061 return 0;
Avi Kivity954bbbc22007-03-30 14:02:32 +03001062 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
1063 if (!page)
Avi Kivityda4a00f2007-01-05 16:36:44 -08001064 return 0;
Uri Lublinab51a432007-02-21 18:25:21 +02001065 mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
Avi Kivityda4a00f2007-01-05 16:36:44 -08001066 virt = kmap_atomic(page, KM_USER0);
Shaohua Life551882007-07-23 14:51:39 +08001067 kvm_mmu_pte_write(vcpu, gpa, val, bytes);
Avi Kivity7cfa4b02007-07-23 18:33:14 +03001068 memcpy(virt + offset_in_page(gpa), val, bytes);
Avi Kivityda4a00f2007-01-05 16:36:44 -08001069 kunmap_atomic(virt, KM_USER0);
Avi Kivityda4a00f2007-01-05 16:36:44 -08001070 return 1;
1071}
1072
Avi Kivityb0fcd902007-07-22 18:48:54 +03001073static int emulator_write_emulated_onepage(unsigned long addr,
1074 const void *val,
1075 unsigned int bytes,
Laurent Viviercebff022007-07-30 13:35:24 +03001076 struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001077{
Gregory Haskins2eeb2e92007-05-31 14:08:53 -04001078 struct kvm_io_device *mmio_dev;
1079 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001080
Avi Kivityc9047f52007-04-17 10:53:22 +03001081 if (gpa == UNMAPPED_GVA) {
1082 kvm_arch_ops->inject_page_fault(vcpu, addr, 2);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001083 return X86EMUL_PROPAGATE_FAULT;
Avi Kivityc9047f52007-04-17 10:53:22 +03001084 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08001085
Avi Kivityda4a00f2007-01-05 16:36:44 -08001086 if (emulator_write_phys(vcpu, gpa, val, bytes))
1087 return X86EMUL_CONTINUE;
1088
Gregory Haskins2eeb2e92007-05-31 14:08:53 -04001089 /*
1090 * Is this MMIO handled locally?
1091 */
1092 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa);
1093 if (mmio_dev) {
1094 kvm_iodevice_write(mmio_dev, gpa, bytes, val);
1095 return X86EMUL_CONTINUE;
1096 }
1097
Avi Kivity6aa8b732006-12-10 02:21:36 -08001098 vcpu->mmio_needed = 1;
1099 vcpu->mmio_phys_addr = gpa;
1100 vcpu->mmio_size = bytes;
1101 vcpu->mmio_is_write = 1;
Avi Kivity4c690a12007-04-22 15:28:19 +03001102 memcpy(vcpu->mmio_data, val, bytes);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001103
1104 return X86EMUL_CONTINUE;
1105}
1106
Laurent Viviere7d5d762007-07-30 13:41:19 +03001107int emulator_write_emulated(unsigned long addr,
Avi Kivityb0fcd902007-07-22 18:48:54 +03001108 const void *val,
1109 unsigned int bytes,
Laurent Viviercebff022007-07-30 13:35:24 +03001110 struct kvm_vcpu *vcpu)
Avi Kivityb0fcd902007-07-22 18:48:54 +03001111{
1112 /* Crossing a page boundary? */
1113 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
1114 int rc, now;
1115
1116 now = -addr & ~PAGE_MASK;
Laurent Viviercebff022007-07-30 13:35:24 +03001117 rc = emulator_write_emulated_onepage(addr, val, now, vcpu);
Avi Kivityb0fcd902007-07-22 18:48:54 +03001118 if (rc != X86EMUL_CONTINUE)
1119 return rc;
1120 addr += now;
1121 val += now;
1122 bytes -= now;
1123 }
Laurent Viviercebff022007-07-30 13:35:24 +03001124 return emulator_write_emulated_onepage(addr, val, bytes, vcpu);
Avi Kivityb0fcd902007-07-22 18:48:54 +03001125}
Laurent Viviere7d5d762007-07-30 13:41:19 +03001126EXPORT_SYMBOL_GPL(emulator_write_emulated);
Avi Kivityb0fcd902007-07-22 18:48:54 +03001127
Avi Kivity6aa8b732006-12-10 02:21:36 -08001128static int emulator_cmpxchg_emulated(unsigned long addr,
Avi Kivity4c690a12007-04-22 15:28:19 +03001129 const void *old,
1130 const void *new,
Avi Kivity6aa8b732006-12-10 02:21:36 -08001131 unsigned int bytes,
Laurent Viviercebff022007-07-30 13:35:24 +03001132 struct kvm_vcpu *vcpu)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001133{
1134 static int reported;
1135
1136 if (!reported) {
1137 reported = 1;
1138 printk(KERN_WARNING "kvm: emulating exchange as write\n");
1139 }
Laurent Viviercebff022007-07-30 13:35:24 +03001140 return emulator_write_emulated(addr, new, bytes, vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001141}
1142
1143static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
1144{
1145 return kvm_arch_ops->get_segment_base(vcpu, seg);
1146}
1147
1148int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
1149{
Avi Kivity6aa8b732006-12-10 02:21:36 -08001150 return X86EMUL_CONTINUE;
1151}
1152
1153int emulate_clts(struct kvm_vcpu *vcpu)
1154{
Avi Kivity399badf2007-01-05 16:36:38 -08001155 unsigned long cr0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001156
Rusty Russell707d92f2007-07-17 23:19:08 +10001157 cr0 = vcpu->cr0 & ~X86_CR0_TS;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001158 kvm_arch_ops->set_cr0(vcpu, cr0);
1159 return X86EMUL_CONTINUE;
1160}
1161
1162int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, unsigned long *dest)
1163{
1164 struct kvm_vcpu *vcpu = ctxt->vcpu;
1165
1166 switch (dr) {
1167 case 0 ... 3:
1168 *dest = kvm_arch_ops->get_dr(vcpu, dr);
1169 return X86EMUL_CONTINUE;
1170 default:
1171 printk(KERN_DEBUG "%s: unexpected dr %u\n",
1172 __FUNCTION__, dr);
1173 return X86EMUL_UNHANDLEABLE;
1174 }
1175}
1176
1177int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
1178{
1179 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
1180 int exception;
1181
1182 kvm_arch_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception);
1183 if (exception) {
1184 /* FIXME: better handling */
1185 return X86EMUL_UNHANDLEABLE;
1186 }
1187 return X86EMUL_CONTINUE;
1188}
1189
1190static void report_emulation_failure(struct x86_emulate_ctxt *ctxt)
1191{
1192 static int reported;
1193 u8 opcodes[4];
1194 unsigned long rip = ctxt->vcpu->rip;
1195 unsigned long rip_linear;
1196
1197 rip_linear = rip + get_segment_base(ctxt->vcpu, VCPU_SREG_CS);
1198
1199 if (reported)
1200 return;
1201
Laurent Viviercebff022007-07-30 13:35:24 +03001202 emulator_read_std(rip_linear, (void *)opcodes, 4, ctxt->vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001203
1204 printk(KERN_ERR "emulation failed but !mmio_needed?"
1205 " rip %lx %02x %02x %02x %02x\n",
1206 rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
1207 reported = 1;
1208}
1209
1210struct x86_emulate_ops emulate_ops = {
1211 .read_std = emulator_read_std,
1212 .write_std = emulator_write_std,
1213 .read_emulated = emulator_read_emulated,
1214 .write_emulated = emulator_write_emulated,
1215 .cmpxchg_emulated = emulator_cmpxchg_emulated,
1216};
1217
1218int emulate_instruction(struct kvm_vcpu *vcpu,
1219 struct kvm_run *run,
1220 unsigned long cr2,
1221 u16 error_code)
1222{
1223 struct x86_emulate_ctxt emulate_ctxt;
1224 int r;
1225 int cs_db, cs_l;
1226
Avi Kivitye7df56e2007-03-14 15:54:54 +02001227 vcpu->mmio_fault_cr2 = cr2;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001228 kvm_arch_ops->cache_regs(vcpu);
1229
1230 kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
1231
1232 emulate_ctxt.vcpu = vcpu;
1233 emulate_ctxt.eflags = kvm_arch_ops->get_rflags(vcpu);
1234 emulate_ctxt.cr2 = cr2;
1235 emulate_ctxt.mode = (emulate_ctxt.eflags & X86_EFLAGS_VM)
1236 ? X86EMUL_MODE_REAL : cs_l
1237 ? X86EMUL_MODE_PROT64 : cs_db
1238 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
1239
1240 if (emulate_ctxt.mode == X86EMUL_MODE_PROT64) {
1241 emulate_ctxt.cs_base = 0;
1242 emulate_ctxt.ds_base = 0;
1243 emulate_ctxt.es_base = 0;
1244 emulate_ctxt.ss_base = 0;
1245 } else {
1246 emulate_ctxt.cs_base = get_segment_base(vcpu, VCPU_SREG_CS);
1247 emulate_ctxt.ds_base = get_segment_base(vcpu, VCPU_SREG_DS);
1248 emulate_ctxt.es_base = get_segment_base(vcpu, VCPU_SREG_ES);
1249 emulate_ctxt.ss_base = get_segment_base(vcpu, VCPU_SREG_SS);
1250 }
1251
1252 emulate_ctxt.gs_base = get_segment_base(vcpu, VCPU_SREG_GS);
1253 emulate_ctxt.fs_base = get_segment_base(vcpu, VCPU_SREG_FS);
1254
1255 vcpu->mmio_is_write = 0;
1256 r = x86_emulate_memop(&emulate_ctxt, &emulate_ops);
1257
1258 if ((r || vcpu->mmio_is_write) && run) {
Jeff Dike8fc0d082007-07-17 12:26:59 -04001259 run->exit_reason = KVM_EXIT_MMIO;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001260 run->mmio.phys_addr = vcpu->mmio_phys_addr;
1261 memcpy(run->mmio.data, vcpu->mmio_data, 8);
1262 run->mmio.len = vcpu->mmio_size;
1263 run->mmio.is_write = vcpu->mmio_is_write;
1264 }
1265
1266 if (r) {
Avi Kivitya4360362007-01-05 16:36:45 -08001267 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
1268 return EMULATE_DONE;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001269 if (!vcpu->mmio_needed) {
1270 report_emulation_failure(&emulate_ctxt);
1271 return EMULATE_FAIL;
1272 }
1273 return EMULATE_DO_MMIO;
1274 }
1275
1276 kvm_arch_ops->decache_regs(vcpu);
1277 kvm_arch_ops->set_rflags(vcpu, emulate_ctxt.eflags);
1278
Avi Kivity02c83202007-04-29 15:02:17 +03001279 if (vcpu->mmio_is_write) {
1280 vcpu->mmio_needed = 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001281 return EMULATE_DO_MMIO;
Avi Kivity02c83202007-04-29 15:02:17 +03001282 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08001283
1284 return EMULATE_DONE;
1285}
1286EXPORT_SYMBOL_GPL(emulate_instruction);
1287
Avi Kivityd3bef152007-06-05 15:53:05 +03001288int kvm_emulate_halt(struct kvm_vcpu *vcpu)
1289{
1290 if (vcpu->irq_summary)
1291 return 1;
1292
1293 vcpu->run->exit_reason = KVM_EXIT_HLT;
1294 ++vcpu->stat.halt_exits;
1295 return 0;
1296}
1297EXPORT_SYMBOL_GPL(kvm_emulate_halt);
1298
Avi Kivity270fd9b2007-02-19 14:37:47 +02001299int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
1300{
1301 unsigned long nr, a0, a1, a2, a3, a4, a5, ret;
1302
Dor Laor9b22bf52007-02-19 16:44:49 +02001303 kvm_arch_ops->cache_regs(vcpu);
Avi Kivity270fd9b2007-02-19 14:37:47 +02001304 ret = -KVM_EINVAL;
1305#ifdef CONFIG_X86_64
1306 if (is_long_mode(vcpu)) {
1307 nr = vcpu->regs[VCPU_REGS_RAX];
1308 a0 = vcpu->regs[VCPU_REGS_RDI];
1309 a1 = vcpu->regs[VCPU_REGS_RSI];
1310 a2 = vcpu->regs[VCPU_REGS_RDX];
1311 a3 = vcpu->regs[VCPU_REGS_RCX];
1312 a4 = vcpu->regs[VCPU_REGS_R8];
1313 a5 = vcpu->regs[VCPU_REGS_R9];
1314 } else
1315#endif
1316 {
1317 nr = vcpu->regs[VCPU_REGS_RBX] & -1u;
1318 a0 = vcpu->regs[VCPU_REGS_RAX] & -1u;
1319 a1 = vcpu->regs[VCPU_REGS_RCX] & -1u;
1320 a2 = vcpu->regs[VCPU_REGS_RDX] & -1u;
1321 a3 = vcpu->regs[VCPU_REGS_RSI] & -1u;
1322 a4 = vcpu->regs[VCPU_REGS_RDI] & -1u;
1323 a5 = vcpu->regs[VCPU_REGS_RBP] & -1u;
1324 }
1325 switch (nr) {
1326 default:
Jeff Dike519ef352007-07-16 15:24:47 -04001327 run->hypercall.nr = nr;
Avi Kivityb4e63f52007-03-04 13:59:30 +02001328 run->hypercall.args[0] = a0;
1329 run->hypercall.args[1] = a1;
1330 run->hypercall.args[2] = a2;
1331 run->hypercall.args[3] = a3;
1332 run->hypercall.args[4] = a4;
1333 run->hypercall.args[5] = a5;
1334 run->hypercall.ret = ret;
1335 run->hypercall.longmode = is_long_mode(vcpu);
1336 kvm_arch_ops->decache_regs(vcpu);
1337 return 0;
Avi Kivity270fd9b2007-02-19 14:37:47 +02001338 }
1339 vcpu->regs[VCPU_REGS_RAX] = ret;
Dor Laor9b22bf52007-02-19 16:44:49 +02001340 kvm_arch_ops->decache_regs(vcpu);
Avi Kivity270fd9b2007-02-19 14:37:47 +02001341 return 1;
1342}
1343EXPORT_SYMBOL_GPL(kvm_hypercall);
1344
Avi Kivity6aa8b732006-12-10 02:21:36 -08001345static u64 mk_cr_64(u64 curr_cr, u32 new_val)
1346{
1347 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
1348}
1349
1350void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
1351{
1352 struct descriptor_table dt = { limit, base };
1353
1354 kvm_arch_ops->set_gdt(vcpu, &dt);
1355}
1356
1357void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
1358{
1359 struct descriptor_table dt = { limit, base };
1360
1361 kvm_arch_ops->set_idt(vcpu, &dt);
1362}
1363
1364void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
1365 unsigned long *rflags)
1366{
1367 lmsw(vcpu, msw);
1368 *rflags = kvm_arch_ops->get_rflags(vcpu);
1369}
1370
1371unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
1372{
Anthony Liguori25c4c272007-04-27 09:29:21 +03001373 kvm_arch_ops->decache_cr4_guest_bits(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001374 switch (cr) {
1375 case 0:
1376 return vcpu->cr0;
1377 case 2:
1378 return vcpu->cr2;
1379 case 3:
1380 return vcpu->cr3;
1381 case 4:
1382 return vcpu->cr4;
1383 default:
1384 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
1385 return 0;
1386 }
1387}
1388
1389void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
1390 unsigned long *rflags)
1391{
1392 switch (cr) {
1393 case 0:
1394 set_cr0(vcpu, mk_cr_64(vcpu->cr0, val));
1395 *rflags = kvm_arch_ops->get_rflags(vcpu);
1396 break;
1397 case 2:
1398 vcpu->cr2 = val;
1399 break;
1400 case 3:
1401 set_cr3(vcpu, val);
1402 break;
1403 case 4:
1404 set_cr4(vcpu, mk_cr_64(vcpu->cr4, val));
1405 break;
1406 default:
1407 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
1408 }
1409}
1410
Ingo Molnar102d8322007-02-19 14:37:47 +02001411/*
1412 * Register the para guest with the host:
1413 */
1414static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa)
1415{
1416 struct kvm_vcpu_para_state *para_state;
1417 hpa_t para_state_hpa, hypercall_hpa;
1418 struct page *para_state_page;
1419 unsigned char *hypercall;
1420 gpa_t hypercall_gpa;
1421
1422 printk(KERN_DEBUG "kvm: guest trying to enter paravirtual mode\n");
1423 printk(KERN_DEBUG ".... para_state_gpa: %08Lx\n", para_state_gpa);
1424
1425 /*
1426 * Needs to be page aligned:
1427 */
1428 if (para_state_gpa != PAGE_ALIGN(para_state_gpa))
1429 goto err_gp;
1430
1431 para_state_hpa = gpa_to_hpa(vcpu, para_state_gpa);
1432 printk(KERN_DEBUG ".... para_state_hpa: %08Lx\n", para_state_hpa);
1433 if (is_error_hpa(para_state_hpa))
1434 goto err_gp;
1435
Uri Lublinab51a432007-02-21 18:25:21 +02001436 mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT);
Ingo Molnar102d8322007-02-19 14:37:47 +02001437 para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT);
Shaohua Life551882007-07-23 14:51:39 +08001438 para_state = kmap(para_state_page);
Ingo Molnar102d8322007-02-19 14:37:47 +02001439
1440 printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version);
1441 printk(KERN_DEBUG ".... size: %d\n", para_state->size);
1442
1443 para_state->host_version = KVM_PARA_API_VERSION;
1444 /*
1445 * We cannot support guests that try to register themselves
1446 * with a newer API version than the host supports:
1447 */
1448 if (para_state->guest_version > KVM_PARA_API_VERSION) {
1449 para_state->ret = -KVM_EINVAL;
1450 goto err_kunmap_skip;
1451 }
1452
1453 hypercall_gpa = para_state->hypercall_gpa;
1454 hypercall_hpa = gpa_to_hpa(vcpu, hypercall_gpa);
1455 printk(KERN_DEBUG ".... hypercall_hpa: %08Lx\n", hypercall_hpa);
1456 if (is_error_hpa(hypercall_hpa)) {
1457 para_state->ret = -KVM_EINVAL;
1458 goto err_kunmap_skip;
1459 }
1460
1461 printk(KERN_DEBUG "kvm: para guest successfully registered.\n");
1462 vcpu->para_state_page = para_state_page;
1463 vcpu->para_state_gpa = para_state_gpa;
1464 vcpu->hypercall_gpa = hypercall_gpa;
1465
Uri Lublinab51a432007-02-21 18:25:21 +02001466 mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT);
Ingo Molnar102d8322007-02-19 14:37:47 +02001467 hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT),
1468 KM_USER1) + (hypercall_hpa & ~PAGE_MASK);
1469 kvm_arch_ops->patch_hypercall(vcpu, hypercall);
1470 kunmap_atomic(hypercall, KM_USER1);
1471
1472 para_state->ret = 0;
1473err_kunmap_skip:
Shaohua Life551882007-07-23 14:51:39 +08001474 kunmap(para_state_page);
Ingo Molnar102d8322007-02-19 14:37:47 +02001475 return 0;
1476err_gp:
1477 return 1;
1478}
1479
Avi Kivity3bab1f52006-12-29 16:49:48 -08001480int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1481{
1482 u64 data;
1483
1484 switch (msr) {
1485 case 0xc0010010: /* SYSCFG */
1486 case 0xc0010015: /* HWCR */
1487 case MSR_IA32_PLATFORM_ID:
1488 case MSR_IA32_P5_MC_ADDR:
1489 case MSR_IA32_P5_MC_TYPE:
1490 case MSR_IA32_MC0_CTL:
1491 case MSR_IA32_MCG_STATUS:
1492 case MSR_IA32_MCG_CAP:
1493 case MSR_IA32_MC0_MISC:
1494 case MSR_IA32_MC0_MISC+4:
1495 case MSR_IA32_MC0_MISC+8:
1496 case MSR_IA32_MC0_MISC+12:
1497 case MSR_IA32_MC0_MISC+16:
1498 case MSR_IA32_UCODE_REV:
Avi Kivitya8d13ea2006-12-29 16:49:51 -08001499 case MSR_IA32_PERF_STATUS:
Matthew Gregan2dc70942007-05-06 10:59:46 +03001500 case MSR_IA32_EBL_CR_POWERON:
Avi Kivity3bab1f52006-12-29 16:49:48 -08001501 /* MTRR registers */
1502 case 0xfe:
1503 case 0x200 ... 0x2ff:
1504 data = 0;
1505 break;
Avi Kivitya8d13ea2006-12-29 16:49:51 -08001506 case 0xcd: /* fsb frequency */
1507 data = 3;
1508 break;
Avi Kivity3bab1f52006-12-29 16:49:48 -08001509 case MSR_IA32_APICBASE:
1510 data = vcpu->apic_base;
1511 break;
Avi Kivity6f00e682007-01-26 00:56:40 -08001512 case MSR_IA32_MISC_ENABLE:
1513 data = vcpu->ia32_misc_enable_msr;
1514 break;
Avi Kivity3bab1f52006-12-29 16:49:48 -08001515#ifdef CONFIG_X86_64
1516 case MSR_EFER:
1517 data = vcpu->shadow_efer;
1518 break;
1519#endif
1520 default:
1521 printk(KERN_ERR "kvm: unhandled rdmsr: 0x%x\n", msr);
1522 return 1;
1523 }
1524 *pdata = data;
1525 return 0;
1526}
1527EXPORT_SYMBOL_GPL(kvm_get_msr_common);
1528
Avi Kivity6aa8b732006-12-10 02:21:36 -08001529/*
1530 * Reads an msr value (of 'msr_index') into 'pdata'.
1531 * Returns 0 on success, non-0 otherwise.
1532 * Assumes vcpu_load() was already called.
1533 */
Avi Kivity35f3f282007-07-17 14:20:30 +03001534int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001535{
1536 return kvm_arch_ops->get_msr(vcpu, msr_index, pdata);
1537}
1538
Avi Kivity05b3e0c2006-12-13 00:33:45 -08001539#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08001540
Avi Kivity3bab1f52006-12-29 16:49:48 -08001541static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001542{
Avi Kivity6aa8b732006-12-10 02:21:36 -08001543 if (efer & EFER_RESERVED_BITS) {
1544 printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
1545 efer);
1546 inject_gp(vcpu);
1547 return;
1548 }
1549
1550 if (is_paging(vcpu)
1551 && (vcpu->shadow_efer & EFER_LME) != (efer & EFER_LME)) {
1552 printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
1553 inject_gp(vcpu);
1554 return;
1555 }
1556
Avi Kivity7725f0b2006-12-13 00:34:01 -08001557 kvm_arch_ops->set_efer(vcpu, efer);
1558
Avi Kivity6aa8b732006-12-10 02:21:36 -08001559 efer &= ~EFER_LMA;
1560 efer |= vcpu->shadow_efer & EFER_LMA;
1561
1562 vcpu->shadow_efer = efer;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001563}
Avi Kivity6aa8b732006-12-10 02:21:36 -08001564
1565#endif
1566
Avi Kivity3bab1f52006-12-29 16:49:48 -08001567int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1568{
1569 switch (msr) {
1570#ifdef CONFIG_X86_64
1571 case MSR_EFER:
1572 set_efer(vcpu, data);
1573 break;
1574#endif
1575 case MSR_IA32_MC0_STATUS:
1576 printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
1577 __FUNCTION__, data);
1578 break;
Sergey Kiselev0e5bf0d2007-03-22 14:06:18 +02001579 case MSR_IA32_MCG_STATUS:
1580 printk(KERN_WARNING "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
1581 __FUNCTION__, data);
1582 break;
Avi Kivity3bab1f52006-12-29 16:49:48 -08001583 case MSR_IA32_UCODE_REV:
1584 case MSR_IA32_UCODE_WRITE:
1585 case 0x200 ... 0x2ff: /* MTRRs */
1586 break;
1587 case MSR_IA32_APICBASE:
1588 vcpu->apic_base = data;
1589 break;
Avi Kivity6f00e682007-01-26 00:56:40 -08001590 case MSR_IA32_MISC_ENABLE:
1591 vcpu->ia32_misc_enable_msr = data;
1592 break;
Ingo Molnar102d8322007-02-19 14:37:47 +02001593 /*
1594 * This is the 'probe whether the host is KVM' logic:
1595 */
1596 case MSR_KVM_API_MAGIC:
1597 return vcpu_register_para(vcpu, data);
1598
Avi Kivity3bab1f52006-12-29 16:49:48 -08001599 default:
1600 printk(KERN_ERR "kvm: unhandled wrmsr: 0x%x\n", msr);
1601 return 1;
1602 }
1603 return 0;
1604}
1605EXPORT_SYMBOL_GPL(kvm_set_msr_common);
1606
Avi Kivity6aa8b732006-12-10 02:21:36 -08001607/*
1608 * Writes msr value into into the appropriate "register".
1609 * Returns 0 on success, non-0 otherwise.
1610 * Assumes vcpu_load() was already called.
1611 */
Avi Kivity35f3f282007-07-17 14:20:30 +03001612int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001613{
1614 return kvm_arch_ops->set_msr(vcpu, msr_index, data);
1615}
1616
1617void kvm_resched(struct kvm_vcpu *vcpu)
1618{
Yaozu Dong3fca0362007-04-25 16:49:19 +03001619 if (!need_resched())
1620 return;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001621 cond_resched();
Avi Kivity6aa8b732006-12-10 02:21:36 -08001622}
1623EXPORT_SYMBOL_GPL(kvm_resched);
1624
Avi Kivity06465c52007-02-28 20:46:53 +02001625void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
1626{
1627 int i;
1628 u32 function;
1629 struct kvm_cpuid_entry *e, *best;
1630
1631 kvm_arch_ops->cache_regs(vcpu);
1632 function = vcpu->regs[VCPU_REGS_RAX];
1633 vcpu->regs[VCPU_REGS_RAX] = 0;
1634 vcpu->regs[VCPU_REGS_RBX] = 0;
1635 vcpu->regs[VCPU_REGS_RCX] = 0;
1636 vcpu->regs[VCPU_REGS_RDX] = 0;
1637 best = NULL;
1638 for (i = 0; i < vcpu->cpuid_nent; ++i) {
1639 e = &vcpu->cpuid_entries[i];
1640 if (e->function == function) {
1641 best = e;
1642 break;
1643 }
1644 /*
1645 * Both basic or both extended?
1646 */
1647 if (((e->function ^ function) & 0x80000000) == 0)
1648 if (!best || e->function > best->function)
1649 best = e;
1650 }
1651 if (best) {
1652 vcpu->regs[VCPU_REGS_RAX] = best->eax;
1653 vcpu->regs[VCPU_REGS_RBX] = best->ebx;
1654 vcpu->regs[VCPU_REGS_RCX] = best->ecx;
1655 vcpu->regs[VCPU_REGS_RDX] = best->edx;
1656 }
1657 kvm_arch_ops->decache_regs(vcpu);
1658 kvm_arch_ops->skip_emulated_instruction(vcpu);
1659}
1660EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
1661
Avi Kivity039576c2007-03-20 12:46:50 +02001662static int pio_copy_data(struct kvm_vcpu *vcpu)
Avi Kivity46fc1472007-02-22 19:39:30 +02001663{
Avi Kivity039576c2007-03-20 12:46:50 +02001664 void *p = vcpu->pio_data;
1665 void *q;
1666 unsigned bytes;
1667 int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;
1668
Avi Kivity039576c2007-03-20 12:46:50 +02001669 q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
1670 PAGE_KERNEL);
1671 if (!q) {
Avi Kivity039576c2007-03-20 12:46:50 +02001672 free_pio_guest_pages(vcpu);
1673 return -ENOMEM;
1674 }
1675 q += vcpu->pio.guest_page_offset;
1676 bytes = vcpu->pio.size * vcpu->pio.cur_count;
1677 if (vcpu->pio.in)
1678 memcpy(q, p, bytes);
1679 else
1680 memcpy(p, q, bytes);
1681 q -= vcpu->pio.guest_page_offset;
1682 vunmap(q);
Avi Kivity039576c2007-03-20 12:46:50 +02001683 free_pio_guest_pages(vcpu);
1684 return 0;
1685}
1686
1687static int complete_pio(struct kvm_vcpu *vcpu)
1688{
1689 struct kvm_pio_request *io = &vcpu->pio;
Avi Kivity46fc1472007-02-22 19:39:30 +02001690 long delta;
Avi Kivity039576c2007-03-20 12:46:50 +02001691 int r;
Avi Kivity46fc1472007-02-22 19:39:30 +02001692
1693 kvm_arch_ops->cache_regs(vcpu);
1694
1695 if (!io->string) {
Avi Kivity039576c2007-03-20 12:46:50 +02001696 if (io->in)
1697 memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data,
Avi Kivity46fc1472007-02-22 19:39:30 +02001698 io->size);
1699 } else {
Avi Kivity039576c2007-03-20 12:46:50 +02001700 if (io->in) {
1701 r = pio_copy_data(vcpu);
1702 if (r) {
1703 kvm_arch_ops->cache_regs(vcpu);
1704 return r;
1705 }
1706 }
1707
Avi Kivity46fc1472007-02-22 19:39:30 +02001708 delta = 1;
1709 if (io->rep) {
Avi Kivity039576c2007-03-20 12:46:50 +02001710 delta *= io->cur_count;
Avi Kivity46fc1472007-02-22 19:39:30 +02001711 /*
1712 * The size of the register should really depend on
1713 * current address size.
1714 */
1715 vcpu->regs[VCPU_REGS_RCX] -= delta;
1716 }
Avi Kivity039576c2007-03-20 12:46:50 +02001717 if (io->down)
Avi Kivity46fc1472007-02-22 19:39:30 +02001718 delta = -delta;
1719 delta *= io->size;
Avi Kivity039576c2007-03-20 12:46:50 +02001720 if (io->in)
Avi Kivity46fc1472007-02-22 19:39:30 +02001721 vcpu->regs[VCPU_REGS_RDI] += delta;
1722 else
1723 vcpu->regs[VCPU_REGS_RSI] += delta;
1724 }
1725
Avi Kivity46fc1472007-02-22 19:39:30 +02001726 kvm_arch_ops->decache_regs(vcpu);
1727
Avi Kivity039576c2007-03-20 12:46:50 +02001728 io->count -= io->cur_count;
1729 io->cur_count = 0;
1730
1731 if (!io->count)
1732 kvm_arch_ops->skip_emulated_instruction(vcpu);
1733 return 0;
Avi Kivity46fc1472007-02-22 19:39:30 +02001734}
1735
Eddie Dong65619eb2007-07-17 11:52:33 +03001736static void kernel_pio(struct kvm_io_device *pio_dev,
1737 struct kvm_vcpu *vcpu,
1738 void *pd)
Eddie Dong74906342007-06-19 18:05:03 +03001739{
1740 /* TODO: String I/O for in kernel device */
1741
1742 if (vcpu->pio.in)
1743 kvm_iodevice_read(pio_dev, vcpu->pio.port,
1744 vcpu->pio.size,
Eddie Dong65619eb2007-07-17 11:52:33 +03001745 pd);
Eddie Dong74906342007-06-19 18:05:03 +03001746 else
1747 kvm_iodevice_write(pio_dev, vcpu->pio.port,
1748 vcpu->pio.size,
Eddie Dong65619eb2007-07-17 11:52:33 +03001749 pd);
1750}
1751
1752static void pio_string_write(struct kvm_io_device *pio_dev,
1753 struct kvm_vcpu *vcpu)
1754{
1755 struct kvm_pio_request *io = &vcpu->pio;
1756 void *pd = vcpu->pio_data;
1757 int i;
1758
1759 for (i = 0; i < io->cur_count; i++) {
1760 kvm_iodevice_write(pio_dev, io->port,
1761 io->size,
1762 pd);
1763 pd += io->size;
1764 }
Eddie Dong74906342007-06-19 18:05:03 +03001765}
1766
Avi Kivity039576c2007-03-20 12:46:50 +02001767int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
1768 int size, unsigned long count, int string, int down,
1769 gva_t address, int rep, unsigned port)
1770{
1771 unsigned now, in_page;
Eddie Dong65619eb2007-07-17 11:52:33 +03001772 int i, ret = 0;
Avi Kivity039576c2007-03-20 12:46:50 +02001773 int nr_pages = 1;
1774 struct page *page;
Eddie Dong74906342007-06-19 18:05:03 +03001775 struct kvm_io_device *pio_dev;
Avi Kivity039576c2007-03-20 12:46:50 +02001776
1777 vcpu->run->exit_reason = KVM_EXIT_IO;
1778 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
1779 vcpu->run->io.size = size;
1780 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
1781 vcpu->run->io.count = count;
1782 vcpu->run->io.port = port;
1783 vcpu->pio.count = count;
1784 vcpu->pio.cur_count = count;
1785 vcpu->pio.size = size;
1786 vcpu->pio.in = in;
Eddie Dong74906342007-06-19 18:05:03 +03001787 vcpu->pio.port = port;
Avi Kivity039576c2007-03-20 12:46:50 +02001788 vcpu->pio.string = string;
1789 vcpu->pio.down = down;
1790 vcpu->pio.guest_page_offset = offset_in_page(address);
1791 vcpu->pio.rep = rep;
1792
Eddie Dong74906342007-06-19 18:05:03 +03001793 pio_dev = vcpu_find_pio_dev(vcpu, port);
Avi Kivity039576c2007-03-20 12:46:50 +02001794 if (!string) {
1795 kvm_arch_ops->cache_regs(vcpu);
1796 memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4);
1797 kvm_arch_ops->decache_regs(vcpu);
Eddie Dong74906342007-06-19 18:05:03 +03001798 if (pio_dev) {
Eddie Dong65619eb2007-07-17 11:52:33 +03001799 kernel_pio(pio_dev, vcpu, vcpu->pio_data);
Eddie Dong74906342007-06-19 18:05:03 +03001800 complete_pio(vcpu);
1801 return 1;
1802 }
Avi Kivity039576c2007-03-20 12:46:50 +02001803 return 0;
1804 }
1805
1806 if (!count) {
1807 kvm_arch_ops->skip_emulated_instruction(vcpu);
1808 return 1;
1809 }
1810
1811 now = min(count, PAGE_SIZE / size);
1812
1813 if (!down)
1814 in_page = PAGE_SIZE - offset_in_page(address);
1815 else
1816 in_page = offset_in_page(address) + size;
1817 now = min(count, (unsigned long)in_page / size);
1818 if (!now) {
1819 /*
1820 * String I/O straddles page boundary. Pin two guest pages
1821 * so that we satisfy atomicity constraints. Do just one
1822 * transaction to avoid complexity.
1823 */
1824 nr_pages = 2;
1825 now = 1;
1826 }
1827 if (down) {
1828 /*
1829 * String I/O in reverse. Yuck. Kill the guest, fix later.
1830 */
1831 printk(KERN_ERR "kvm: guest string pio down\n");
1832 inject_gp(vcpu);
1833 return 1;
1834 }
1835 vcpu->run->io.count = now;
1836 vcpu->pio.cur_count = now;
1837
1838 for (i = 0; i < nr_pages; ++i) {
Shaohua Li11ec2802007-07-23 14:51:37 +08001839 mutex_lock(&vcpu->kvm->lock);
Avi Kivity039576c2007-03-20 12:46:50 +02001840 page = gva_to_page(vcpu, address + i * PAGE_SIZE);
1841 if (page)
1842 get_page(page);
1843 vcpu->pio.guest_pages[i] = page;
Shaohua Li11ec2802007-07-23 14:51:37 +08001844 mutex_unlock(&vcpu->kvm->lock);
Avi Kivity039576c2007-03-20 12:46:50 +02001845 if (!page) {
1846 inject_gp(vcpu);
1847 free_pio_guest_pages(vcpu);
1848 return 1;
1849 }
1850 }
1851
Eddie Dong65619eb2007-07-17 11:52:33 +03001852 if (!vcpu->pio.in) {
1853 /* string PIO write */
1854 ret = pio_copy_data(vcpu);
1855 if (ret >= 0 && pio_dev) {
1856 pio_string_write(pio_dev, vcpu);
1857 complete_pio(vcpu);
1858 if (vcpu->pio.count == 0)
1859 ret = 1;
1860 }
1861 } else if (pio_dev)
1862 printk(KERN_ERR "no string pio read support yet, "
1863 "port %x size %d count %ld\n",
1864 port, size, count);
1865
1866 return ret;
Avi Kivity039576c2007-03-20 12:46:50 +02001867}
1868EXPORT_SYMBOL_GPL(kvm_setup_pio);
1869
Avi Kivitybccf2152007-02-21 18:04:26 +02001870static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001871{
Avi Kivity6aa8b732006-12-10 02:21:36 -08001872 int r;
Avi Kivity1961d272007-03-05 19:46:05 +02001873 sigset_t sigsaved;
Avi Kivity6aa8b732006-12-10 02:21:36 -08001874
Avi Kivitybccf2152007-02-21 18:04:26 +02001875 vcpu_load(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001876
Avi Kivity1961d272007-03-05 19:46:05 +02001877 if (vcpu->sigset_active)
1878 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
1879
Dor Laor54810342007-02-12 00:54:39 -08001880 /* re-sync apic's tpr */
1881 vcpu->cr8 = kvm_run->cr8;
1882
Avi Kivity02c83202007-04-29 15:02:17 +03001883 if (vcpu->pio.cur_count) {
1884 r = complete_pio(vcpu);
1885 if (r)
1886 goto out;
1887 }
1888
1889 if (vcpu->mmio_needed) {
1890 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
1891 vcpu->mmio_read_completed = 1;
1892 vcpu->mmio_needed = 0;
1893 r = emulate_instruction(vcpu, kvm_run,
1894 vcpu->mmio_fault_cr2, 0);
1895 if (r == EMULATE_DO_MMIO) {
1896 /*
1897 * Read-modify-write. Back to userspace.
1898 */
Avi Kivity02c83202007-04-29 15:02:17 +03001899 r = 0;
1900 goto out;
Avi Kivity46fc1472007-02-22 19:39:30 +02001901 }
Avi Kivity6aa8b732006-12-10 02:21:36 -08001902 }
1903
Avi Kivity8eb7d332007-03-04 14:17:08 +02001904 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) {
Avi Kivityb4e63f52007-03-04 13:59:30 +02001905 kvm_arch_ops->cache_regs(vcpu);
1906 vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;
1907 kvm_arch_ops->decache_regs(vcpu);
1908 }
1909
Avi Kivity6aa8b732006-12-10 02:21:36 -08001910 r = kvm_arch_ops->run(vcpu, kvm_run);
1911
Avi Kivity039576c2007-03-20 12:46:50 +02001912out:
Avi Kivity1961d272007-03-05 19:46:05 +02001913 if (vcpu->sigset_active)
1914 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1915
Avi Kivity6aa8b732006-12-10 02:21:36 -08001916 vcpu_put(vcpu);
1917 return r;
1918}
1919
Avi Kivitybccf2152007-02-21 18:04:26 +02001920static int kvm_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu,
1921 struct kvm_regs *regs)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001922{
Avi Kivitybccf2152007-02-21 18:04:26 +02001923 vcpu_load(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001924
1925 kvm_arch_ops->cache_regs(vcpu);
1926
1927 regs->rax = vcpu->regs[VCPU_REGS_RAX];
1928 regs->rbx = vcpu->regs[VCPU_REGS_RBX];
1929 regs->rcx = vcpu->regs[VCPU_REGS_RCX];
1930 regs->rdx = vcpu->regs[VCPU_REGS_RDX];
1931 regs->rsi = vcpu->regs[VCPU_REGS_RSI];
1932 regs->rdi = vcpu->regs[VCPU_REGS_RDI];
1933 regs->rsp = vcpu->regs[VCPU_REGS_RSP];
1934 regs->rbp = vcpu->regs[VCPU_REGS_RBP];
Avi Kivity05b3e0c2006-12-13 00:33:45 -08001935#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08001936 regs->r8 = vcpu->regs[VCPU_REGS_R8];
1937 regs->r9 = vcpu->regs[VCPU_REGS_R9];
1938 regs->r10 = vcpu->regs[VCPU_REGS_R10];
1939 regs->r11 = vcpu->regs[VCPU_REGS_R11];
1940 regs->r12 = vcpu->regs[VCPU_REGS_R12];
1941 regs->r13 = vcpu->regs[VCPU_REGS_R13];
1942 regs->r14 = vcpu->regs[VCPU_REGS_R14];
1943 regs->r15 = vcpu->regs[VCPU_REGS_R15];
1944#endif
1945
1946 regs->rip = vcpu->rip;
1947 regs->rflags = kvm_arch_ops->get_rflags(vcpu);
1948
1949 /*
1950 * Don't leak debug flags in case they were set for guest debugging
1951 */
1952 if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
1953 regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
1954
1955 vcpu_put(vcpu);
1956
1957 return 0;
1958}
1959
Avi Kivitybccf2152007-02-21 18:04:26 +02001960static int kvm_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu,
1961 struct kvm_regs *regs)
Avi Kivity6aa8b732006-12-10 02:21:36 -08001962{
Avi Kivitybccf2152007-02-21 18:04:26 +02001963 vcpu_load(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08001964
1965 vcpu->regs[VCPU_REGS_RAX] = regs->rax;
1966 vcpu->regs[VCPU_REGS_RBX] = regs->rbx;
1967 vcpu->regs[VCPU_REGS_RCX] = regs->rcx;
1968 vcpu->regs[VCPU_REGS_RDX] = regs->rdx;
1969 vcpu->regs[VCPU_REGS_RSI] = regs->rsi;
1970 vcpu->regs[VCPU_REGS_RDI] = regs->rdi;
1971 vcpu->regs[VCPU_REGS_RSP] = regs->rsp;
1972 vcpu->regs[VCPU_REGS_RBP] = regs->rbp;
Avi Kivity05b3e0c2006-12-13 00:33:45 -08001973#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08001974 vcpu->regs[VCPU_REGS_R8] = regs->r8;
1975 vcpu->regs[VCPU_REGS_R9] = regs->r9;
1976 vcpu->regs[VCPU_REGS_R10] = regs->r10;
1977 vcpu->regs[VCPU_REGS_R11] = regs->r11;
1978 vcpu->regs[VCPU_REGS_R12] = regs->r12;
1979 vcpu->regs[VCPU_REGS_R13] = regs->r13;
1980 vcpu->regs[VCPU_REGS_R14] = regs->r14;
1981 vcpu->regs[VCPU_REGS_R15] = regs->r15;
1982#endif
1983
1984 vcpu->rip = regs->rip;
1985 kvm_arch_ops->set_rflags(vcpu, regs->rflags);
1986
1987 kvm_arch_ops->decache_regs(vcpu);
1988
1989 vcpu_put(vcpu);
1990
1991 return 0;
1992}
1993
1994static void get_segment(struct kvm_vcpu *vcpu,
1995 struct kvm_segment *var, int seg)
1996{
1997 return kvm_arch_ops->get_segment(vcpu, var, seg);
1998}
1999
Avi Kivitybccf2152007-02-21 18:04:26 +02002000static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2001 struct kvm_sregs *sregs)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002002{
Avi Kivity6aa8b732006-12-10 02:21:36 -08002003 struct descriptor_table dt;
2004
Avi Kivitybccf2152007-02-21 18:04:26 +02002005 vcpu_load(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002006
2007 get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
2008 get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
2009 get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
2010 get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
2011 get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
2012 get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
2013
2014 get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
2015 get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
2016
2017 kvm_arch_ops->get_idt(vcpu, &dt);
2018 sregs->idt.limit = dt.limit;
2019 sregs->idt.base = dt.base;
2020 kvm_arch_ops->get_gdt(vcpu, &dt);
2021 sregs->gdt.limit = dt.limit;
2022 sregs->gdt.base = dt.base;
2023
Anthony Liguori25c4c272007-04-27 09:29:21 +03002024 kvm_arch_ops->decache_cr4_guest_bits(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002025 sregs->cr0 = vcpu->cr0;
2026 sregs->cr2 = vcpu->cr2;
2027 sregs->cr3 = vcpu->cr3;
2028 sregs->cr4 = vcpu->cr4;
2029 sregs->cr8 = vcpu->cr8;
2030 sregs->efer = vcpu->shadow_efer;
2031 sregs->apic_base = vcpu->apic_base;
2032
2033 memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
2034 sizeof sregs->interrupt_bitmap);
2035
2036 vcpu_put(vcpu);
2037
2038 return 0;
2039}
2040
2041static void set_segment(struct kvm_vcpu *vcpu,
2042 struct kvm_segment *var, int seg)
2043{
2044 return kvm_arch_ops->set_segment(vcpu, var, seg);
2045}
2046
Avi Kivitybccf2152007-02-21 18:04:26 +02002047static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2048 struct kvm_sregs *sregs)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002049{
Avi Kivity6aa8b732006-12-10 02:21:36 -08002050 int mmu_reset_needed = 0;
2051 int i;
2052 struct descriptor_table dt;
2053
Avi Kivitybccf2152007-02-21 18:04:26 +02002054 vcpu_load(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002055
Avi Kivity6aa8b732006-12-10 02:21:36 -08002056 dt.limit = sregs->idt.limit;
2057 dt.base = sregs->idt.base;
2058 kvm_arch_ops->set_idt(vcpu, &dt);
2059 dt.limit = sregs->gdt.limit;
2060 dt.base = sregs->gdt.base;
2061 kvm_arch_ops->set_gdt(vcpu, &dt);
2062
2063 vcpu->cr2 = sregs->cr2;
2064 mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
2065 vcpu->cr3 = sregs->cr3;
2066
2067 vcpu->cr8 = sregs->cr8;
2068
2069 mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
Avi Kivity05b3e0c2006-12-13 00:33:45 -08002070#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08002071 kvm_arch_ops->set_efer(vcpu, sregs->efer);
2072#endif
2073 vcpu->apic_base = sregs->apic_base;
2074
Anthony Liguori25c4c272007-04-27 09:29:21 +03002075 kvm_arch_ops->decache_cr4_guest_bits(vcpu);
Avi Kivity399badf2007-01-05 16:36:38 -08002076
Avi Kivity6aa8b732006-12-10 02:21:36 -08002077 mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
Avi Kivityf6528b02007-03-20 18:44:51 +02002078 kvm_arch_ops->set_cr0(vcpu, sregs->cr0);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002079
2080 mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
2081 kvm_arch_ops->set_cr4(vcpu, sregs->cr4);
Avi Kivity1b0973b2007-01-05 16:36:41 -08002082 if (!is_long_mode(vcpu) && is_pae(vcpu))
2083 load_pdptrs(vcpu, vcpu->cr3);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002084
2085 if (mmu_reset_needed)
2086 kvm_mmu_reset_context(vcpu);
2087
2088 memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
2089 sizeof vcpu->irq_pending);
2090 vcpu->irq_summary = 0;
Rusty Russell9eb829c2007-07-18 13:05:58 +10002091 for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002092 if (vcpu->irq_pending[i])
2093 __set_bit(i, &vcpu->irq_summary);
2094
Avi Kivity024aa1c2007-03-21 13:44:58 +02002095 set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
2096 set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
2097 set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
2098 set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
2099 set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
2100 set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
2101
2102 set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
2103 set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
2104
Avi Kivity6aa8b732006-12-10 02:21:36 -08002105 vcpu_put(vcpu);
2106
2107 return 0;
2108}
2109
2110/*
2111 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
2112 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
Michael Riepebf591b22006-12-22 01:05:36 -08002113 *
2114 * This list is modified at module load time to reflect the
2115 * capabilities of the host cpu.
Avi Kivity6aa8b732006-12-10 02:21:36 -08002116 */
2117static u32 msrs_to_save[] = {
2118 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
2119 MSR_K6_STAR,
Avi Kivity05b3e0c2006-12-13 00:33:45 -08002120#ifdef CONFIG_X86_64
Avi Kivity6aa8b732006-12-10 02:21:36 -08002121 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
2122#endif
2123 MSR_IA32_TIME_STAMP_COUNTER,
2124};
2125
Michael Riepebf591b22006-12-22 01:05:36 -08002126static unsigned num_msrs_to_save;
2127
Avi Kivity6f00e682007-01-26 00:56:40 -08002128static u32 emulated_msrs[] = {
2129 MSR_IA32_MISC_ENABLE,
2130};
2131
Michael Riepebf591b22006-12-22 01:05:36 -08002132static __init void kvm_init_msr_list(void)
2133{
2134 u32 dummy[2];
2135 unsigned i, j;
2136
2137 for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
2138 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
2139 continue;
2140 if (j < i)
2141 msrs_to_save[j] = msrs_to_save[i];
2142 j++;
2143 }
2144 num_msrs_to_save = j;
2145}
Avi Kivity6aa8b732006-12-10 02:21:36 -08002146
2147/*
2148 * Adapt set_msr() to msr_io()'s calling convention
2149 */
2150static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
2151{
Avi Kivity35f3f282007-07-17 14:20:30 +03002152 return kvm_set_msr(vcpu, index, *data);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002153}
2154
2155/*
2156 * Read or write a bunch of msrs. All parameters are kernel addresses.
2157 *
2158 * @return number of msrs set successfully.
2159 */
Avi Kivitybccf2152007-02-21 18:04:26 +02002160static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
Avi Kivity6aa8b732006-12-10 02:21:36 -08002161 struct kvm_msr_entry *entries,
2162 int (*do_msr)(struct kvm_vcpu *vcpu,
2163 unsigned index, u64 *data))
2164{
Avi Kivity6aa8b732006-12-10 02:21:36 -08002165 int i;
2166
Avi Kivitybccf2152007-02-21 18:04:26 +02002167 vcpu_load(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002168
2169 for (i = 0; i < msrs->nmsrs; ++i)
2170 if (do_msr(vcpu, entries[i].index, &entries[i].data))
2171 break;
2172
2173 vcpu_put(vcpu);
2174
2175 return i;
2176}
2177
2178/*
2179 * Read or write a bunch of msrs. Parameters are user addresses.
2180 *
2181 * @return number of msrs set successfully.
2182 */
Avi Kivitybccf2152007-02-21 18:04:26 +02002183static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
Avi Kivity6aa8b732006-12-10 02:21:36 -08002184 int (*do_msr)(struct kvm_vcpu *vcpu,
2185 unsigned index, u64 *data),
2186 int writeback)
2187{
2188 struct kvm_msrs msrs;
2189 struct kvm_msr_entry *entries;
2190 int r, n;
2191 unsigned size;
2192
2193 r = -EFAULT;
2194 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
2195 goto out;
2196
2197 r = -E2BIG;
2198 if (msrs.nmsrs >= MAX_IO_MSRS)
2199 goto out;
2200
2201 r = -ENOMEM;
2202 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
2203 entries = vmalloc(size);
2204 if (!entries)
2205 goto out;
2206
2207 r = -EFAULT;
2208 if (copy_from_user(entries, user_msrs->entries, size))
2209 goto out_free;
2210
Avi Kivitybccf2152007-02-21 18:04:26 +02002211 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002212 if (r < 0)
2213 goto out_free;
2214
2215 r = -EFAULT;
2216 if (writeback && copy_to_user(user_msrs->entries, entries, size))
2217 goto out_free;
2218
2219 r = n;
2220
2221out_free:
2222 vfree(entries);
2223out:
2224 return r;
2225}
2226
2227/*
2228 * Translate a guest virtual address to a guest physical address.
2229 */
Avi Kivitybccf2152007-02-21 18:04:26 +02002230static int kvm_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2231 struct kvm_translation *tr)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002232{
2233 unsigned long vaddr = tr->linear_address;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002234 gpa_t gpa;
2235
Avi Kivitybccf2152007-02-21 18:04:26 +02002236 vcpu_load(vcpu);
Shaohua Li11ec2802007-07-23 14:51:37 +08002237 mutex_lock(&vcpu->kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002238 gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr);
2239 tr->physical_address = gpa;
2240 tr->valid = gpa != UNMAPPED_GVA;
2241 tr->writeable = 1;
2242 tr->usermode = 0;
Shaohua Li11ec2802007-07-23 14:51:37 +08002243 mutex_unlock(&vcpu->kvm->lock);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002244 vcpu_put(vcpu);
2245
2246 return 0;
2247}
2248
Avi Kivitybccf2152007-02-21 18:04:26 +02002249static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
2250 struct kvm_interrupt *irq)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002251{
Avi Kivity6aa8b732006-12-10 02:21:36 -08002252 if (irq->irq < 0 || irq->irq >= 256)
2253 return -EINVAL;
Avi Kivitybccf2152007-02-21 18:04:26 +02002254 vcpu_load(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002255
2256 set_bit(irq->irq, vcpu->irq_pending);
2257 set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);
2258
2259 vcpu_put(vcpu);
2260
2261 return 0;
2262}
2263
Avi Kivitybccf2152007-02-21 18:04:26 +02002264static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
2265 struct kvm_debug_guest *dbg)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002266{
Avi Kivity6aa8b732006-12-10 02:21:36 -08002267 int r;
2268
Avi Kivitybccf2152007-02-21 18:04:26 +02002269 vcpu_load(vcpu);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002270
2271 r = kvm_arch_ops->set_guest_debug(vcpu, dbg);
2272
2273 vcpu_put(vcpu);
2274
2275 return r;
2276}
2277
Avi Kivity9a2bb7f2007-02-22 12:58:31 +02002278static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma,
2279 unsigned long address,
2280 int *type)
2281{
2282 struct kvm_vcpu *vcpu = vma->vm_file->private_data;
2283 unsigned long pgoff;
2284 struct page *page;
2285
Avi Kivity9a2bb7f2007-02-22 12:58:31 +02002286 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
Avi Kivity039576c2007-03-20 12:46:50 +02002287 if (pgoff == 0)
2288 page = virt_to_page(vcpu->run);
2289 else if (pgoff == KVM_PIO_PAGE_OFFSET)
2290 page = virt_to_page(vcpu->pio_data);
2291 else
Avi Kivity9a2bb7f2007-02-22 12:58:31 +02002292 return NOPAGE_SIGBUS;
Avi Kivity9a2bb7f2007-02-22 12:58:31 +02002293 get_page(page);
Nguyen Anh Quynhcd0d9132007-07-11 14:30:54 +03002294 if (type != NULL)
2295 *type = VM_FAULT_MINOR;
2296
Avi Kivity9a2bb7f2007-02-22 12:58:31 +02002297 return page;
2298}
2299
2300static struct vm_operations_struct kvm_vcpu_vm_ops = {
2301 .nopage = kvm_vcpu_nopage,
2302};
2303
2304static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
2305{
2306 vma->vm_ops = &kvm_vcpu_vm_ops;
2307 return 0;
2308}
2309
Avi Kivitybccf2152007-02-21 18:04:26 +02002310static int kvm_vcpu_release(struct inode *inode, struct file *filp)
2311{
2312 struct kvm_vcpu *vcpu = filp->private_data;
2313
2314 fput(vcpu->kvm->filp);
2315 return 0;
2316}
2317
2318static struct file_operations kvm_vcpu_fops = {
2319 .release = kvm_vcpu_release,
2320 .unlocked_ioctl = kvm_vcpu_ioctl,
2321 .compat_ioctl = kvm_vcpu_ioctl,
Avi Kivity9a2bb7f2007-02-22 12:58:31 +02002322 .mmap = kvm_vcpu_mmap,
Avi Kivitybccf2152007-02-21 18:04:26 +02002323};
2324
2325/*
2326 * Allocates an inode for the vcpu.
2327 */
2328static int create_vcpu_fd(struct kvm_vcpu *vcpu)
2329{
2330 int fd, r;
2331 struct inode *inode;
2332 struct file *file;
2333
Avi Kivityd6d28162007-06-28 08:38:16 -04002334 r = anon_inode_getfd(&fd, &inode, &file,
2335 "kvm-vcpu", &kvm_vcpu_fops, vcpu);
2336 if (r)
2337 return r;
Avi Kivitybccf2152007-02-21 18:04:26 +02002338 atomic_inc(&vcpu->kvm->filp->f_count);
Avi Kivitybccf2152007-02-21 18:04:26 +02002339 return fd;
Avi Kivitybccf2152007-02-21 18:04:26 +02002340}
2341
Avi Kivityc5ea7662007-02-20 18:41:05 +02002342/*
2343 * Creates some virtual cpus. Good luck creating more than one.
2344 */
2345static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
2346{
2347 int r;
2348 struct kvm_vcpu *vcpu;
2349
Avi Kivityc5ea7662007-02-20 18:41:05 +02002350 if (!valid_vcpu(n))
Rusty Russellfb3f0f52007-07-27 17:16:56 +10002351 return -EINVAL;
Avi Kivityc5ea7662007-02-20 18:41:05 +02002352
Rusty Russellfb3f0f52007-07-27 17:16:56 +10002353 vcpu = kvm_arch_ops->vcpu_create(kvm, n);
2354 if (IS_ERR(vcpu))
2355 return PTR_ERR(vcpu);
Avi Kivityc5ea7662007-02-20 18:41:05 +02002356
Avi Kivity15ad7142007-07-11 18:17:21 +03002357 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
2358
Rusty Russellfb3f0f52007-07-27 17:16:56 +10002359 vcpu_load(vcpu);
Avi Kivityc5ea7662007-02-20 18:41:05 +02002360 r = kvm_mmu_setup(vcpu);
Avi Kivityc5ea7662007-02-20 18:41:05 +02002361 vcpu_put(vcpu);
Avi Kivityc5ea7662007-02-20 18:41:05 +02002362 if (r < 0)
Rusty Russellfb3f0f52007-07-27 17:16:56 +10002363 goto free_vcpu;
Avi Kivityc5ea7662007-02-20 18:41:05 +02002364
Shaohua Li11ec2802007-07-23 14:51:37 +08002365 mutex_lock(&kvm->lock);
Rusty Russellfb3f0f52007-07-27 17:16:56 +10002366 if (kvm->vcpus[n]) {
2367 r = -EEXIST;
Shaohua Li11ec2802007-07-23 14:51:37 +08002368 mutex_unlock(&kvm->lock);
Rusty Russellfb3f0f52007-07-27 17:16:56 +10002369 goto mmu_unload;
2370 }
2371 kvm->vcpus[n] = vcpu;
Shaohua Li11ec2802007-07-23 14:51:37 +08002372 mutex_unlock(&kvm->lock);
Rusty Russellfb3f0f52007-07-27 17:16:56 +10002373
2374 /* Now it's all set up, let userspace reach it */
Avi Kivitybccf2152007-02-21 18:04:26 +02002375 r = create_vcpu_fd(vcpu);
2376 if (r < 0)
Rusty Russellfb3f0f52007-07-27 17:16:56 +10002377 goto unlink;
Avi Kivitybccf2152007-02-21 18:04:26 +02002378 return r;
Avi Kivityc5ea7662007-02-20 18:41:05 +02002379
Rusty Russellfb3f0f52007-07-27 17:16:56 +10002380unlink:
Shaohua Li11ec2802007-07-23 14:51:37 +08002381 mutex_lock(&kvm->lock);
Rusty Russellfb3f0f52007-07-27 17:16:56 +10002382 kvm->vcpus[n] = NULL;
Shaohua Li11ec2802007-07-23 14:51:37 +08002383 mutex_unlock(&kvm->lock);
Rusty Russellfb3f0f52007-07-27 17:16:56 +10002384
2385mmu_unload:
2386 vcpu_load(vcpu);
2387 kvm_mmu_unload(vcpu);
2388 vcpu_put(vcpu);
2389
2390free_vcpu:
2391 kvm_arch_ops->vcpu_free(vcpu);
Avi Kivityc5ea7662007-02-20 18:41:05 +02002392 return r;
2393}
2394
Eddie Dong2cc51562007-05-21 07:28:09 +03002395static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
2396{
2397 u64 efer;
2398 int i;
2399 struct kvm_cpuid_entry *e, *entry;
2400
2401 rdmsrl(MSR_EFER, efer);
2402 entry = NULL;
2403 for (i = 0; i < vcpu->cpuid_nent; ++i) {
2404 e = &vcpu->cpuid_entries[i];
2405 if (e->function == 0x80000001) {
2406 entry = e;
2407 break;
2408 }
2409 }
Avi Kivity4c981b42007-07-25 09:22:12 +03002410 if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) {
Eddie Dong2cc51562007-05-21 07:28:09 +03002411 entry->edx &= ~(1 << 20);
Avi Kivity4c981b42007-07-25 09:22:12 +03002412 printk(KERN_INFO "kvm: guest NX capability removed\n");
Eddie Dong2cc51562007-05-21 07:28:09 +03002413 }
2414}
2415
Avi Kivity06465c52007-02-28 20:46:53 +02002416static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
2417 struct kvm_cpuid *cpuid,
2418 struct kvm_cpuid_entry __user *entries)
2419{
2420 int r;
2421
2422 r = -E2BIG;
2423 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
2424 goto out;
2425 r = -EFAULT;
2426 if (copy_from_user(&vcpu->cpuid_entries, entries,
2427 cpuid->nent * sizeof(struct kvm_cpuid_entry)))
2428 goto out;
2429 vcpu->cpuid_nent = cpuid->nent;
Eddie Dong2cc51562007-05-21 07:28:09 +03002430 cpuid_fix_nx_cap(vcpu);
Avi Kivity06465c52007-02-28 20:46:53 +02002431 return 0;
2432
2433out:
2434 return r;
2435}
2436
Avi Kivity1961d272007-03-05 19:46:05 +02002437static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
2438{
2439 if (sigset) {
2440 sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP));
2441 vcpu->sigset_active = 1;
2442 vcpu->sigset = *sigset;
2443 } else
2444 vcpu->sigset_active = 0;
2445 return 0;
2446}
2447
Avi Kivityb8836732007-04-01 16:34:31 +03002448/*
2449 * fxsave fpu state. Taken from x86_64/processor.h. To be killed when
2450 * we have asm/x86/processor.h
2451 */
2452struct fxsave {
2453 u16 cwd;
2454 u16 swd;
2455 u16 twd;
2456 u16 fop;
2457 u64 rip;
2458 u64 rdp;
2459 u32 mxcsr;
2460 u32 mxcsr_mask;
2461 u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
2462#ifdef CONFIG_X86_64
2463 u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
2464#else
2465 u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */
2466#endif
2467};
2468
2469static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2470{
2471 struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image;
2472
2473 vcpu_load(vcpu);
2474
2475 memcpy(fpu->fpr, fxsave->st_space, 128);
2476 fpu->fcw = fxsave->cwd;
2477 fpu->fsw = fxsave->swd;
2478 fpu->ftwx = fxsave->twd;
2479 fpu->last_opcode = fxsave->fop;
2480 fpu->last_ip = fxsave->rip;
2481 fpu->last_dp = fxsave->rdp;
2482 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
2483
2484 vcpu_put(vcpu);
2485
2486 return 0;
2487}
2488
2489static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2490{
2491 struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image;
2492
2493 vcpu_load(vcpu);
2494
2495 memcpy(fxsave->st_space, fpu->fpr, 128);
2496 fxsave->cwd = fpu->fcw;
2497 fxsave->swd = fpu->fsw;
2498 fxsave->twd = fpu->ftwx;
2499 fxsave->fop = fpu->last_opcode;
2500 fxsave->rip = fpu->last_ip;
2501 fxsave->rdp = fpu->last_dp;
2502 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
2503
2504 vcpu_put(vcpu);
2505
2506 return 0;
2507}
2508
Avi Kivitybccf2152007-02-21 18:04:26 +02002509static long kvm_vcpu_ioctl(struct file *filp,
2510 unsigned int ioctl, unsigned long arg)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002511{
Avi Kivitybccf2152007-02-21 18:04:26 +02002512 struct kvm_vcpu *vcpu = filp->private_data;
Al Viro2f366982007-02-09 16:38:35 +00002513 void __user *argp = (void __user *)arg;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002514 int r = -EINVAL;
2515
2516 switch (ioctl) {
Avi Kivity9a2bb7f2007-02-22 12:58:31 +02002517 case KVM_RUN:
Avi Kivityf0fe5102007-03-07 13:11:17 +02002518 r = -EINVAL;
2519 if (arg)
2520 goto out;
Avi Kivity9a2bb7f2007-02-22 12:58:31 +02002521 r = kvm_vcpu_ioctl_run(vcpu, vcpu->run);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002522 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002523 case KVM_GET_REGS: {
2524 struct kvm_regs kvm_regs;
2525
Avi Kivitybccf2152007-02-21 18:04:26 +02002526 memset(&kvm_regs, 0, sizeof kvm_regs);
2527 r = kvm_vcpu_ioctl_get_regs(vcpu, &kvm_regs);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002528 if (r)
2529 goto out;
2530 r = -EFAULT;
Al Viro2f366982007-02-09 16:38:35 +00002531 if (copy_to_user(argp, &kvm_regs, sizeof kvm_regs))
Avi Kivity6aa8b732006-12-10 02:21:36 -08002532 goto out;
2533 r = 0;
2534 break;
2535 }
2536 case KVM_SET_REGS: {
2537 struct kvm_regs kvm_regs;
2538
2539 r = -EFAULT;
Al Viro2f366982007-02-09 16:38:35 +00002540 if (copy_from_user(&kvm_regs, argp, sizeof kvm_regs))
Avi Kivity6aa8b732006-12-10 02:21:36 -08002541 goto out;
Avi Kivitybccf2152007-02-21 18:04:26 +02002542 r = kvm_vcpu_ioctl_set_regs(vcpu, &kvm_regs);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002543 if (r)
2544 goto out;
2545 r = 0;
2546 break;
2547 }
2548 case KVM_GET_SREGS: {
2549 struct kvm_sregs kvm_sregs;
2550
Avi Kivitybccf2152007-02-21 18:04:26 +02002551 memset(&kvm_sregs, 0, sizeof kvm_sregs);
2552 r = kvm_vcpu_ioctl_get_sregs(vcpu, &kvm_sregs);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002553 if (r)
2554 goto out;
2555 r = -EFAULT;
Al Viro2f366982007-02-09 16:38:35 +00002556 if (copy_to_user(argp, &kvm_sregs, sizeof kvm_sregs))
Avi Kivity6aa8b732006-12-10 02:21:36 -08002557 goto out;
2558 r = 0;
2559 break;
2560 }
2561 case KVM_SET_SREGS: {
2562 struct kvm_sregs kvm_sregs;
2563
2564 r = -EFAULT;
Al Viro2f366982007-02-09 16:38:35 +00002565 if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs))
Avi Kivity6aa8b732006-12-10 02:21:36 -08002566 goto out;
Avi Kivitybccf2152007-02-21 18:04:26 +02002567 r = kvm_vcpu_ioctl_set_sregs(vcpu, &kvm_sregs);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002568 if (r)
2569 goto out;
2570 r = 0;
2571 break;
2572 }
2573 case KVM_TRANSLATE: {
2574 struct kvm_translation tr;
2575
2576 r = -EFAULT;
Al Viro2f366982007-02-09 16:38:35 +00002577 if (copy_from_user(&tr, argp, sizeof tr))
Avi Kivity6aa8b732006-12-10 02:21:36 -08002578 goto out;
Avi Kivitybccf2152007-02-21 18:04:26 +02002579 r = kvm_vcpu_ioctl_translate(vcpu, &tr);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002580 if (r)
2581 goto out;
2582 r = -EFAULT;
Al Viro2f366982007-02-09 16:38:35 +00002583 if (copy_to_user(argp, &tr, sizeof tr))
Avi Kivity6aa8b732006-12-10 02:21:36 -08002584 goto out;
2585 r = 0;
2586 break;
2587 }
2588 case KVM_INTERRUPT: {
2589 struct kvm_interrupt irq;
2590
2591 r = -EFAULT;
Al Viro2f366982007-02-09 16:38:35 +00002592 if (copy_from_user(&irq, argp, sizeof irq))
Avi Kivity6aa8b732006-12-10 02:21:36 -08002593 goto out;
Avi Kivitybccf2152007-02-21 18:04:26 +02002594 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002595 if (r)
2596 goto out;
2597 r = 0;
2598 break;
2599 }
2600 case KVM_DEBUG_GUEST: {
2601 struct kvm_debug_guest dbg;
2602
2603 r = -EFAULT;
Al Viro2f366982007-02-09 16:38:35 +00002604 if (copy_from_user(&dbg, argp, sizeof dbg))
Avi Kivity6aa8b732006-12-10 02:21:36 -08002605 goto out;
Avi Kivitybccf2152007-02-21 18:04:26 +02002606 r = kvm_vcpu_ioctl_debug_guest(vcpu, &dbg);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002607 if (r)
2608 goto out;
2609 r = 0;
2610 break;
2611 }
Avi Kivitybccf2152007-02-21 18:04:26 +02002612 case KVM_GET_MSRS:
Avi Kivity35f3f282007-07-17 14:20:30 +03002613 r = msr_io(vcpu, argp, kvm_get_msr, 1);
Avi Kivitybccf2152007-02-21 18:04:26 +02002614 break;
2615 case KVM_SET_MSRS:
2616 r = msr_io(vcpu, argp, do_set_msr, 0);
2617 break;
Avi Kivity06465c52007-02-28 20:46:53 +02002618 case KVM_SET_CPUID: {
2619 struct kvm_cpuid __user *cpuid_arg = argp;
2620 struct kvm_cpuid cpuid;
2621
2622 r = -EFAULT;
2623 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2624 goto out;
2625 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
2626 if (r)
2627 goto out;
2628 break;
2629 }
Avi Kivity1961d272007-03-05 19:46:05 +02002630 case KVM_SET_SIGNAL_MASK: {
2631 struct kvm_signal_mask __user *sigmask_arg = argp;
2632 struct kvm_signal_mask kvm_sigmask;
2633 sigset_t sigset, *p;
2634
2635 p = NULL;
2636 if (argp) {
2637 r = -EFAULT;
2638 if (copy_from_user(&kvm_sigmask, argp,
2639 sizeof kvm_sigmask))
2640 goto out;
2641 r = -EINVAL;
2642 if (kvm_sigmask.len != sizeof sigset)
2643 goto out;
2644 r = -EFAULT;
2645 if (copy_from_user(&sigset, sigmask_arg->sigset,
2646 sizeof sigset))
2647 goto out;
2648 p = &sigset;
2649 }
2650 r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
2651 break;
2652 }
Avi Kivityb8836732007-04-01 16:34:31 +03002653 case KVM_GET_FPU: {
2654 struct kvm_fpu fpu;
2655
2656 memset(&fpu, 0, sizeof fpu);
2657 r = kvm_vcpu_ioctl_get_fpu(vcpu, &fpu);
2658 if (r)
2659 goto out;
2660 r = -EFAULT;
2661 if (copy_to_user(argp, &fpu, sizeof fpu))
2662 goto out;
2663 r = 0;
2664 break;
2665 }
2666 case KVM_SET_FPU: {
2667 struct kvm_fpu fpu;
2668
2669 r = -EFAULT;
2670 if (copy_from_user(&fpu, argp, sizeof fpu))
2671 goto out;
2672 r = kvm_vcpu_ioctl_set_fpu(vcpu, &fpu);
2673 if (r)
2674 goto out;
2675 r = 0;
2676 break;
2677 }
Avi Kivitybccf2152007-02-21 18:04:26 +02002678 default:
2679 ;
2680 }
2681out:
2682 return r;
2683}
2684
2685static long kvm_vm_ioctl(struct file *filp,
2686 unsigned int ioctl, unsigned long arg)
2687{
2688 struct kvm *kvm = filp->private_data;
2689 void __user *argp = (void __user *)arg;
2690 int r = -EINVAL;
2691
2692 switch (ioctl) {
2693 case KVM_CREATE_VCPU:
2694 r = kvm_vm_ioctl_create_vcpu(kvm, arg);
2695 if (r < 0)
2696 goto out;
2697 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002698 case KVM_SET_MEMORY_REGION: {
2699 struct kvm_memory_region kvm_mem;
2700
2701 r = -EFAULT;
Al Viro2f366982007-02-09 16:38:35 +00002702 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
Avi Kivity6aa8b732006-12-10 02:21:36 -08002703 goto out;
Avi Kivity2c6f5df2007-02-20 18:27:58 +02002704 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_mem);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002705 if (r)
2706 goto out;
2707 break;
2708 }
2709 case KVM_GET_DIRTY_LOG: {
2710 struct kvm_dirty_log log;
2711
2712 r = -EFAULT;
Al Viro2f366982007-02-09 16:38:35 +00002713 if (copy_from_user(&log, argp, sizeof log))
Avi Kivity6aa8b732006-12-10 02:21:36 -08002714 goto out;
Avi Kivity2c6f5df2007-02-20 18:27:58 +02002715 r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002716 if (r)
2717 goto out;
2718 break;
2719 }
Avi Kivitye8207542007-03-30 16:54:30 +03002720 case KVM_SET_MEMORY_ALIAS: {
2721 struct kvm_memory_alias alias;
2722
2723 r = -EFAULT;
2724 if (copy_from_user(&alias, argp, sizeof alias))
2725 goto out;
2726 r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
2727 if (r)
2728 goto out;
2729 break;
2730 }
Avi Kivityf17abe92007-02-21 19:28:04 +02002731 default:
2732 ;
2733 }
2734out:
2735 return r;
2736}
2737
2738static struct page *kvm_vm_nopage(struct vm_area_struct *vma,
2739 unsigned long address,
2740 int *type)
2741{
2742 struct kvm *kvm = vma->vm_file->private_data;
2743 unsigned long pgoff;
Avi Kivityf17abe92007-02-21 19:28:04 +02002744 struct page *page;
2745
Avi Kivityf17abe92007-02-21 19:28:04 +02002746 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
Avi Kivity954bbbc22007-03-30 14:02:32 +03002747 page = gfn_to_page(kvm, pgoff);
Avi Kivityf17abe92007-02-21 19:28:04 +02002748 if (!page)
2749 return NOPAGE_SIGBUS;
2750 get_page(page);
Nguyen Anh Quynhcd0d9132007-07-11 14:30:54 +03002751 if (type != NULL)
2752 *type = VM_FAULT_MINOR;
2753
Avi Kivityf17abe92007-02-21 19:28:04 +02002754 return page;
2755}
2756
2757static struct vm_operations_struct kvm_vm_vm_ops = {
2758 .nopage = kvm_vm_nopage,
2759};
2760
2761static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
2762{
2763 vma->vm_ops = &kvm_vm_vm_ops;
2764 return 0;
2765}
2766
2767static struct file_operations kvm_vm_fops = {
2768 .release = kvm_vm_release,
2769 .unlocked_ioctl = kvm_vm_ioctl,
2770 .compat_ioctl = kvm_vm_ioctl,
2771 .mmap = kvm_vm_mmap,
2772};
2773
2774static int kvm_dev_ioctl_create_vm(void)
2775{
2776 int fd, r;
2777 struct inode *inode;
2778 struct file *file;
2779 struct kvm *kvm;
2780
Avi Kivityf17abe92007-02-21 19:28:04 +02002781 kvm = kvm_create_vm();
Avi Kivityd6d28162007-06-28 08:38:16 -04002782 if (IS_ERR(kvm))
2783 return PTR_ERR(kvm);
2784 r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm);
2785 if (r) {
2786 kvm_destroy_vm(kvm);
2787 return r;
Avi Kivityf17abe92007-02-21 19:28:04 +02002788 }
2789
Avi Kivitybccf2152007-02-21 18:04:26 +02002790 kvm->filp = file;
Avi Kivityf17abe92007-02-21 19:28:04 +02002791
Avi Kivityf17abe92007-02-21 19:28:04 +02002792 return fd;
Avi Kivityf17abe92007-02-21 19:28:04 +02002793}
2794
2795static long kvm_dev_ioctl(struct file *filp,
2796 unsigned int ioctl, unsigned long arg)
2797{
2798 void __user *argp = (void __user *)arg;
Avi Kivity07c45a32007-03-07 13:05:38 +02002799 long r = -EINVAL;
Avi Kivityf17abe92007-02-21 19:28:04 +02002800
2801 switch (ioctl) {
2802 case KVM_GET_API_VERSION:
Avi Kivityf0fe5102007-03-07 13:11:17 +02002803 r = -EINVAL;
2804 if (arg)
2805 goto out;
Avi Kivityf17abe92007-02-21 19:28:04 +02002806 r = KVM_API_VERSION;
2807 break;
2808 case KVM_CREATE_VM:
Avi Kivityf0fe5102007-03-07 13:11:17 +02002809 r = -EINVAL;
2810 if (arg)
2811 goto out;
Avi Kivityf17abe92007-02-21 19:28:04 +02002812 r = kvm_dev_ioctl_create_vm();
2813 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002814 case KVM_GET_MSR_INDEX_LIST: {
Al Viro2f366982007-02-09 16:38:35 +00002815 struct kvm_msr_list __user *user_msr_list = argp;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002816 struct kvm_msr_list msr_list;
2817 unsigned n;
2818
2819 r = -EFAULT;
2820 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
2821 goto out;
2822 n = msr_list.nmsrs;
Avi Kivity6f00e682007-01-26 00:56:40 -08002823 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
Avi Kivity6aa8b732006-12-10 02:21:36 -08002824 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
2825 goto out;
2826 r = -E2BIG;
Michael Riepebf591b22006-12-22 01:05:36 -08002827 if (n < num_msrs_to_save)
Avi Kivity6aa8b732006-12-10 02:21:36 -08002828 goto out;
2829 r = -EFAULT;
2830 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
Michael Riepebf591b22006-12-22 01:05:36 -08002831 num_msrs_to_save * sizeof(u32)))
Avi Kivity6aa8b732006-12-10 02:21:36 -08002832 goto out;
Avi Kivity6f00e682007-01-26 00:56:40 -08002833 if (copy_to_user(user_msr_list->indices
2834 + num_msrs_to_save * sizeof(u32),
2835 &emulated_msrs,
2836 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
2837 goto out;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002838 r = 0;
Avi Kivitycc1d8952007-01-05 16:36:58 -08002839 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002840 }
Avi Kivity5d308f42007-03-01 17:56:20 +02002841 case KVM_CHECK_EXTENSION:
2842 /*
2843 * No extensions defined at present.
2844 */
2845 r = 0;
2846 break;
Avi Kivity07c45a32007-03-07 13:05:38 +02002847 case KVM_GET_VCPU_MMAP_SIZE:
2848 r = -EINVAL;
2849 if (arg)
2850 goto out;
Avi Kivity039576c2007-03-20 12:46:50 +02002851 r = 2 * PAGE_SIZE;
Avi Kivity07c45a32007-03-07 13:05:38 +02002852 break;
Avi Kivity6aa8b732006-12-10 02:21:36 -08002853 default:
2854 ;
2855 }
2856out:
2857 return r;
2858}
2859
Avi Kivity6aa8b732006-12-10 02:21:36 -08002860static struct file_operations kvm_chardev_ops = {
2861 .open = kvm_dev_open,
2862 .release = kvm_dev_release,
2863 .unlocked_ioctl = kvm_dev_ioctl,
2864 .compat_ioctl = kvm_dev_ioctl,
Avi Kivity6aa8b732006-12-10 02:21:36 -08002865};
2866
2867static struct miscdevice kvm_dev = {
Avi Kivitybbe44322007-03-04 13:27:36 +02002868 KVM_MINOR,
Avi Kivity6aa8b732006-12-10 02:21:36 -08002869 "kvm",
2870 &kvm_chardev_ops,
2871};
2872
Avi Kivity774c47f2007-02-12 00:54:47 -08002873/*
2874 * Make sure that a cpu that is being hot-unplugged does not have any vcpus
2875 * cached on it.
2876 */
2877static void decache_vcpus_on_cpu(int cpu)
2878{
2879 struct kvm *vm;
2880 struct kvm_vcpu *vcpu;
2881 int i;
2882
2883 spin_lock(&kvm_lock);
Shaohua Li11ec2802007-07-23 14:51:37 +08002884 list_for_each_entry(vm, &vm_list, vm_list)
Avi Kivity774c47f2007-02-12 00:54:47 -08002885 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
Rusty Russellfb3f0f52007-07-27 17:16:56 +10002886 vcpu = vm->vcpus[i];
2887 if (!vcpu)
2888 continue;
Avi Kivity774c47f2007-02-12 00:54:47 -08002889 /*
2890 * If the vcpu is locked, then it is running on some
2891 * other cpu and therefore it is not cached on the
2892 * cpu in question.
2893 *
2894 * If it's not locked, check the last cpu it executed
2895 * on.
2896 */
2897 if (mutex_trylock(&vcpu->mutex)) {
2898 if (vcpu->cpu == cpu) {
2899 kvm_arch_ops->vcpu_decache(vcpu);
2900 vcpu->cpu = -1;
2901 }
2902 mutex_unlock(&vcpu->mutex);
2903 }
2904 }
2905 spin_unlock(&kvm_lock);
2906}
2907
Avi Kivity1b6c0162007-05-24 13:03:52 +03002908static void hardware_enable(void *junk)
2909{
2910 int cpu = raw_smp_processor_id();
2911
2912 if (cpu_isset(cpu, cpus_hardware_enabled))
2913 return;
2914 cpu_set(cpu, cpus_hardware_enabled);
2915 kvm_arch_ops->hardware_enable(NULL);
2916}
2917
2918static void hardware_disable(void *junk)
2919{
2920 int cpu = raw_smp_processor_id();
2921
2922 if (!cpu_isset(cpu, cpus_hardware_enabled))
2923 return;
2924 cpu_clear(cpu, cpus_hardware_enabled);
2925 decache_vcpus_on_cpu(cpu);
2926 kvm_arch_ops->hardware_disable(NULL);
2927}
2928
Avi Kivity774c47f2007-02-12 00:54:47 -08002929static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
2930 void *v)
2931{
2932 int cpu = (long)v;
2933
2934 switch (val) {
Avi Kivitycec9ad22007-05-24 13:11:41 +03002935 case CPU_DYING:
2936 case CPU_DYING_FROZEN:
Avi Kivity6ec8a852007-08-19 15:57:26 +03002937 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
2938 cpu);
2939 hardware_disable(NULL);
2940 break;
Avi Kivity774c47f2007-02-12 00:54:47 -08002941 case CPU_UP_CANCELED:
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07002942 case CPU_UP_CANCELED_FROZEN:
Jeremy Katz43934a32007-02-19 14:37:46 +02002943 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
2944 cpu);
Avi Kivity1b6c0162007-05-24 13:03:52 +03002945 smp_call_function_single(cpu, hardware_disable, NULL, 0, 1);
Avi Kivity774c47f2007-02-12 00:54:47 -08002946 break;
Jeremy Katz43934a32007-02-19 14:37:46 +02002947 case CPU_ONLINE:
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07002948 case CPU_ONLINE_FROZEN:
Jeremy Katz43934a32007-02-19 14:37:46 +02002949 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
2950 cpu);
Avi Kivity1b6c0162007-05-24 13:03:52 +03002951 smp_call_function_single(cpu, hardware_enable, NULL, 0, 1);
Avi Kivity774c47f2007-02-12 00:54:47 -08002952 break;
2953 }
2954 return NOTIFY_OK;
2955}
2956
Rusty Russell9a2b85c2007-07-17 23:17:55 +10002957static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
2958 void *v)
2959{
2960 if (val == SYS_RESTART) {
2961 /*
2962 * Some (well, at least mine) BIOSes hang on reboot if
2963 * in vmx root mode.
2964 */
2965 printk(KERN_INFO "kvm: exiting hardware virtualization\n");
2966 on_each_cpu(hardware_disable, NULL, 0, 1);
2967 }
2968 return NOTIFY_OK;
2969}
2970
2971static struct notifier_block kvm_reboot_notifier = {
2972 .notifier_call = kvm_reboot,
2973 .priority = 0,
2974};
2975
Gregory Haskins2eeb2e92007-05-31 14:08:53 -04002976void kvm_io_bus_init(struct kvm_io_bus *bus)
2977{
2978 memset(bus, 0, sizeof(*bus));
2979}
2980
2981void kvm_io_bus_destroy(struct kvm_io_bus *bus)
2982{
2983 int i;
2984
2985 for (i = 0; i < bus->dev_count; i++) {
2986 struct kvm_io_device *pos = bus->devs[i];
2987
2988 kvm_iodevice_destructor(pos);
2989 }
2990}
2991
2992struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr)
2993{
2994 int i;
2995
2996 for (i = 0; i < bus->dev_count; i++) {
2997 struct kvm_io_device *pos = bus->devs[i];
2998
2999 if (pos->in_range(pos, addr))
3000 return pos;
3001 }
3002
3003 return NULL;
3004}
3005
3006void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev)
3007{
3008 BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1));
3009
3010 bus->devs[bus->dev_count++] = dev;
3011}
3012
Avi Kivity774c47f2007-02-12 00:54:47 -08003013static struct notifier_block kvm_cpu_notifier = {
3014 .notifier_call = kvm_cpu_hotplug,
3015 .priority = 20, /* must be > scheduler priority */
3016};
3017
Avi Kivity1165f5f2007-04-19 17:27:43 +03003018static u64 stat_get(void *_offset)
3019{
3020 unsigned offset = (long)_offset;
3021 u64 total = 0;
3022 struct kvm *kvm;
3023 struct kvm_vcpu *vcpu;
3024 int i;
3025
3026 spin_lock(&kvm_lock);
3027 list_for_each_entry(kvm, &vm_list, vm_list)
3028 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
Rusty Russellfb3f0f52007-07-27 17:16:56 +10003029 vcpu = kvm->vcpus[i];
3030 if (vcpu)
3031 total += *(u32 *)((void *)vcpu + offset);
Avi Kivity1165f5f2007-04-19 17:27:43 +03003032 }
3033 spin_unlock(&kvm_lock);
3034 return total;
3035}
3036
3037static void stat_set(void *offset, u64 val)
3038{
3039}
3040
3041DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, stat_set, "%llu\n");
3042
Avi Kivity6aa8b732006-12-10 02:21:36 -08003043static __init void kvm_init_debug(void)
3044{
3045 struct kvm_stats_debugfs_item *p;
3046
Al Viro8b6d44c2007-02-09 16:38:40 +00003047 debugfs_dir = debugfs_create_dir("kvm", NULL);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003048 for (p = debugfs_entries; p->name; ++p)
Avi Kivity1165f5f2007-04-19 17:27:43 +03003049 p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir,
3050 (void *)(long)p->offset,
3051 &stat_fops);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003052}
3053
3054static void kvm_exit_debug(void)
3055{
3056 struct kvm_stats_debugfs_item *p;
3057
3058 for (p = debugfs_entries; p->name; ++p)
3059 debugfs_remove(p->dentry);
3060 debugfs_remove(debugfs_dir);
3061}
3062
Avi Kivity59ae6c62007-02-12 00:54:48 -08003063static int kvm_suspend(struct sys_device *dev, pm_message_t state)
3064{
Avi Kivity4267c412007-05-24 13:09:41 +03003065 hardware_disable(NULL);
Avi Kivity59ae6c62007-02-12 00:54:48 -08003066 return 0;
3067}
3068
3069static int kvm_resume(struct sys_device *dev)
3070{
Avi Kivity4267c412007-05-24 13:09:41 +03003071 hardware_enable(NULL);
Avi Kivity59ae6c62007-02-12 00:54:48 -08003072 return 0;
3073}
3074
3075static struct sysdev_class kvm_sysdev_class = {
3076 set_kset_name("kvm"),
3077 .suspend = kvm_suspend,
3078 .resume = kvm_resume,
3079};
3080
3081static struct sys_device kvm_sysdev = {
3082 .id = 0,
3083 .cls = &kvm_sysdev_class,
3084};
3085
Avi Kivity6aa8b732006-12-10 02:21:36 -08003086hpa_t bad_page_address;
3087
Avi Kivity15ad7142007-07-11 18:17:21 +03003088static inline
3089struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
3090{
3091 return container_of(pn, struct kvm_vcpu, preempt_notifier);
3092}
3093
3094static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
3095{
3096 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
3097
3098 kvm_arch_ops->vcpu_load(vcpu, cpu);
3099}
3100
3101static void kvm_sched_out(struct preempt_notifier *pn,
3102 struct task_struct *next)
3103{
3104 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
3105
3106 kvm_arch_ops->vcpu_put(vcpu);
3107}
3108
Rusty Russellc16f8622007-07-30 21:12:19 +10003109int kvm_init_arch(struct kvm_arch_ops *ops, unsigned int vcpu_size,
3110 struct module *module)
Avi Kivity6aa8b732006-12-10 02:21:36 -08003111{
3112 int r;
3113
Yoshimi Ichiyanagi09db28b2006-12-29 16:49:41 -08003114 if (kvm_arch_ops) {
3115 printk(KERN_ERR "kvm: already loaded the other module\n");
3116 return -EEXIST;
3117 }
3118
Yoshimi Ichiyanagie097f352007-01-05 16:36:24 -08003119 if (!ops->cpu_has_kvm_support()) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08003120 printk(KERN_ERR "kvm: no hardware support\n");
3121 return -EOPNOTSUPP;
3122 }
Yoshimi Ichiyanagie097f352007-01-05 16:36:24 -08003123 if (ops->disabled_by_bios()) {
Avi Kivity6aa8b732006-12-10 02:21:36 -08003124 printk(KERN_ERR "kvm: disabled by bios\n");
3125 return -EOPNOTSUPP;
3126 }
3127
Yoshimi Ichiyanagie097f352007-01-05 16:36:24 -08003128 kvm_arch_ops = ops;
3129
Avi Kivity6aa8b732006-12-10 02:21:36 -08003130 r = kvm_arch_ops->hardware_setup();
3131 if (r < 0)
Avi Kivityca45aaa2007-03-01 19:21:03 +02003132 goto out;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003133
Avi Kivity1b6c0162007-05-24 13:03:52 +03003134 on_each_cpu(hardware_enable, NULL, 0, 1);
Avi Kivity774c47f2007-02-12 00:54:47 -08003135 r = register_cpu_notifier(&kvm_cpu_notifier);
3136 if (r)
3137 goto out_free_1;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003138 register_reboot_notifier(&kvm_reboot_notifier);
3139
Avi Kivity59ae6c62007-02-12 00:54:48 -08003140 r = sysdev_class_register(&kvm_sysdev_class);
3141 if (r)
3142 goto out_free_2;
3143
3144 r = sysdev_register(&kvm_sysdev);
3145 if (r)
3146 goto out_free_3;
3147
Rusty Russellc16f8622007-07-30 21:12:19 +10003148 /* A kmem cache lets us meet the alignment requirements of fx_save. */
3149 kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size,
3150 __alignof__(struct kvm_vcpu), 0, 0);
3151 if (!kvm_vcpu_cache) {
3152 r = -ENOMEM;
3153 goto out_free_4;
3154 }
3155
Avi Kivity6aa8b732006-12-10 02:21:36 -08003156 kvm_chardev_ops.owner = module;
3157
3158 r = misc_register(&kvm_dev);
3159 if (r) {
3160 printk (KERN_ERR "kvm: misc device register failed\n");
3161 goto out_free;
3162 }
3163
Avi Kivity15ad7142007-07-11 18:17:21 +03003164 kvm_preempt_ops.sched_in = kvm_sched_in;
3165 kvm_preempt_ops.sched_out = kvm_sched_out;
3166
Avi Kivity6aa8b732006-12-10 02:21:36 -08003167 return r;
3168
3169out_free:
Rusty Russellc16f8622007-07-30 21:12:19 +10003170 kmem_cache_destroy(kvm_vcpu_cache);
3171out_free_4:
Avi Kivity59ae6c62007-02-12 00:54:48 -08003172 sysdev_unregister(&kvm_sysdev);
3173out_free_3:
3174 sysdev_class_unregister(&kvm_sysdev_class);
3175out_free_2:
Avi Kivity6aa8b732006-12-10 02:21:36 -08003176 unregister_reboot_notifier(&kvm_reboot_notifier);
Avi Kivity774c47f2007-02-12 00:54:47 -08003177 unregister_cpu_notifier(&kvm_cpu_notifier);
3178out_free_1:
Avi Kivity1b6c0162007-05-24 13:03:52 +03003179 on_each_cpu(hardware_disable, NULL, 0, 1);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003180 kvm_arch_ops->hardware_unsetup();
Avi Kivityca45aaa2007-03-01 19:21:03 +02003181out:
3182 kvm_arch_ops = NULL;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003183 return r;
3184}
3185
3186void kvm_exit_arch(void)
3187{
3188 misc_deregister(&kvm_dev);
Rusty Russellc16f8622007-07-30 21:12:19 +10003189 kmem_cache_destroy(kvm_vcpu_cache);
Avi Kivity59ae6c62007-02-12 00:54:48 -08003190 sysdev_unregister(&kvm_sysdev);
3191 sysdev_class_unregister(&kvm_sysdev_class);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003192 unregister_reboot_notifier(&kvm_reboot_notifier);
Avi Kivity59ae6c62007-02-12 00:54:48 -08003193 unregister_cpu_notifier(&kvm_cpu_notifier);
Avi Kivity1b6c0162007-05-24 13:03:52 +03003194 on_each_cpu(hardware_disable, NULL, 0, 1);
Avi Kivity6aa8b732006-12-10 02:21:36 -08003195 kvm_arch_ops->hardware_unsetup();
Yoshimi Ichiyanagi09db28b2006-12-29 16:49:41 -08003196 kvm_arch_ops = NULL;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003197}
3198
3199static __init int kvm_init(void)
3200{
3201 static struct page *bad_page;
Avi Kivity37e29d92007-02-20 14:07:37 +02003202 int r;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003203
Avi Kivityb5a33a72007-04-15 16:31:09 +03003204 r = kvm_mmu_module_init();
3205 if (r)
3206 goto out4;
3207
Avi Kivity6aa8b732006-12-10 02:21:36 -08003208 kvm_init_debug();
3209
Michael Riepebf591b22006-12-22 01:05:36 -08003210 kvm_init_msr_list();
3211
Avi Kivity6aa8b732006-12-10 02:21:36 -08003212 if ((bad_page = alloc_page(GFP_KERNEL)) == NULL) {
3213 r = -ENOMEM;
3214 goto out;
3215 }
3216
3217 bad_page_address = page_to_pfn(bad_page) << PAGE_SHIFT;
3218 memset(__va(bad_page_address), 0, PAGE_SIZE);
3219
Avi Kivity58e690e2007-02-26 16:29:43 +02003220 return 0;
Avi Kivity6aa8b732006-12-10 02:21:36 -08003221
3222out:
3223 kvm_exit_debug();
Avi Kivityb5a33a72007-04-15 16:31:09 +03003224 kvm_mmu_module_exit();
3225out4:
Avi Kivity6aa8b732006-12-10 02:21:36 -08003226 return r;
3227}
3228
3229static __exit void kvm_exit(void)
3230{
3231 kvm_exit_debug();
3232 __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT));
Avi Kivityb5a33a72007-04-15 16:31:09 +03003233 kvm_mmu_module_exit();
Avi Kivity6aa8b732006-12-10 02:21:36 -08003234}
3235
3236module_init(kvm_init)
3237module_exit(kvm_exit)
3238
3239EXPORT_SYMBOL_GPL(kvm_init_arch);
3240EXPORT_SYMBOL_GPL(kvm_exit_arch);