blob: 20fefac576b7f5c5a6099f2ef69227c6bed8f7e3 [file] [log] [blame]
Jun Nakajima1321c762011-03-04 17:17:45 -08001/*
2 * QEMU KVM support
3 *
4 * Copyright IBM, Corp. 2008
5 * Red Hat, Inc. 2008
6 *
7 * Authors:
8 * Anthony Liguori <aliguori@us.ibm.com>
9 * Glauber Costa <gcosta@redhat.com>
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12 * See the COPYING file in the top-level directory.
13 *
14 */
15
16#include <sys/types.h>
17#include <sys/ioctl.h>
18#include <sys/mman.h>
19#include <stdarg.h>
20
21#include <linux/kvm.h>
22
David 'Digit' Turner9b3a4b02014-01-23 00:52:54 +010023#include "cpu.h"
Jun Nakajima1321c762011-03-04 17:17:45 -080024#include "qemu-common.h"
David 'Digit' Turner34c48ff2013-12-15 00:25:03 +010025#include "sysemu/sysemu.h"
Jun Nakajima1321c762011-03-04 17:17:45 -080026#include "hw/hw.h"
David 'Digit' Turner852088c2013-12-14 23:04:12 +010027#include "exec/gdbstub.h"
David 'Digit' Turner34c48ff2013-12-15 00:25:03 +010028#include "sysemu/kvm.h"
Jun Nakajima1321c762011-03-04 17:17:45 -080029
30/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
31#define PAGE_SIZE TARGET_PAGE_SIZE
32
33//#define DEBUG_KVM
34
35#ifdef DEBUG_KVM
36#define dprintf(fmt, ...) \
37 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
38#else
39#define dprintf(fmt, ...) \
40 do { } while (0)
41#endif
42
43typedef struct KVMSlot
44{
David 'Digit' Turnerbcde1092014-01-09 23:19:19 +010045 hwaddr start_addr;
Jun Nakajima1321c762011-03-04 17:17:45 -080046 ram_addr_t memory_size;
47 ram_addr_t phys_offset;
48 int slot;
49 int flags;
50} KVMSlot;
51
52typedef struct kvm_dirty_log KVMDirtyLog;
53
54int kvm_allowed = 0;
55
56struct KVMState
57{
58 KVMSlot slots[32];
59 int fd;
60 int vmfd;
61 int coalesced_mmio;
62 int broken_set_mem_region;
63 int migration_log;
64#ifdef KVM_CAP_SET_GUEST_DEBUG
65 struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
66#endif
67};
68
69static KVMState *kvm_state;
70
71static KVMSlot *kvm_alloc_slot(KVMState *s)
72{
73 int i;
74
75 for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
76 /* KVM private memory slots */
77 if (i >= 8 && i < 12)
78 continue;
79 if (s->slots[i].memory_size == 0)
80 return &s->slots[i];
81 }
82
83 fprintf(stderr, "%s: no free slot available\n", __func__);
84 abort();
85}
86
87static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
David 'Digit' Turnerbcde1092014-01-09 23:19:19 +010088 hwaddr start_addr,
89 hwaddr end_addr)
Jun Nakajima1321c762011-03-04 17:17:45 -080090{
91 int i;
92
93 for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
94 KVMSlot *mem = &s->slots[i];
95
96 if (start_addr == mem->start_addr &&
97 end_addr == mem->start_addr + mem->memory_size) {
98 return mem;
99 }
100 }
101
102 return NULL;
103}
104
105/*
106 * Find overlapping slot with lowest start address
107 */
108static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
David 'Digit' Turnerbcde1092014-01-09 23:19:19 +0100109 hwaddr start_addr,
110 hwaddr end_addr)
Jun Nakajima1321c762011-03-04 17:17:45 -0800111{
112 KVMSlot *found = NULL;
113 int i;
114
115 for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
116 KVMSlot *mem = &s->slots[i];
117
118 if (mem->memory_size == 0 ||
119 (found && found->start_addr < mem->start_addr)) {
120 continue;
121 }
122
123 if (end_addr > mem->start_addr &&
124 start_addr < mem->start_addr + mem->memory_size) {
125 found = mem;
126 }
127 }
128
129 return found;
130}
131
132static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
133{
134 struct kvm_userspace_memory_region mem;
135
136 mem.slot = slot->slot;
137 mem.guest_phys_addr = slot->start_addr;
138 mem.memory_size = slot->memory_size;
139 mem.userspace_addr = (unsigned long)qemu_get_ram_ptr(slot->phys_offset);
140 mem.flags = slot->flags;
141 if (s->migration_log) {
142 mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
143 }
144 return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
145}
146
147
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100148int kvm_init_vcpu(CPUOldState *env)
Jun Nakajima1321c762011-03-04 17:17:45 -0800149{
150 KVMState *s = kvm_state;
151 long mmap_size;
152 int ret;
153
154 dprintf("kvm_init_vcpu\n");
155
156 ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
157 if (ret < 0) {
158 dprintf("kvm_create_vcpu failed\n");
159 goto err;
160 }
161
162 env->kvm_fd = ret;
163 env->kvm_state = s;
164
165 mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
166 if (mmap_size < 0) {
167 dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
168 goto err;
169 }
170
171 env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
172 env->kvm_fd, 0);
173 if (env->kvm_run == MAP_FAILED) {
174 ret = -errno;
175 dprintf("mmap'ing vcpu state failed\n");
176 goto err;
177 }
178
179 ret = kvm_arch_init_vcpu(env);
180
181err:
182 return ret;
183}
184
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100185int kvm_put_mp_state(CPUOldState *env)
Jun Nakajima1321c762011-03-04 17:17:45 -0800186{
187 struct kvm_mp_state mp_state = { .mp_state = env->mp_state };
188
189 return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, &mp_state);
190}
191
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100192int kvm_get_mp_state(CPUOldState *env)
Jun Nakajima1321c762011-03-04 17:17:45 -0800193{
194 struct kvm_mp_state mp_state;
195 int ret;
196
197 ret = kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, &mp_state);
198 if (ret < 0) {
199 return ret;
200 }
201 env->mp_state = mp_state.mp_state;
202 return 0;
203}
204
205int kvm_sync_vcpus(void)
206{
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100207 CPUOldState *env;
Jun Nakajima1321c762011-03-04 17:17:45 -0800208
David 'Digit' Turner4ab12252014-03-24 11:29:53 +0100209 CPU_FOREACH(env) {
210 int ret = kvm_arch_put_registers(env);
Jun Nakajima1321c762011-03-04 17:17:45 -0800211 if (ret)
212 return ret;
213 }
214
215 return 0;
216}
217
218/*
219 * dirty pages logging control
220 */
David 'Digit' Turnerbcde1092014-01-09 23:19:19 +0100221static int kvm_dirty_pages_log_change(hwaddr phys_addr,
Jun Nakajima1321c762011-03-04 17:17:45 -0800222 ram_addr_t size, int flags, int mask)
223{
224 KVMState *s = kvm_state;
225 KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
226 int old_flags;
227
228 if (mem == NULL) {
229 fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
David 'Digit' Turnera2c14f92014-02-04 01:02:30 +0100230 TARGET_FMT_plx "\n", __func__, (hwaddr)phys_addr,
231 (hwaddr)(phys_addr + size - 1));
Jun Nakajima1321c762011-03-04 17:17:45 -0800232 return -EINVAL;
233 }
234
235 old_flags = mem->flags;
236
237 flags = (mem->flags & ~mask) | flags;
238 mem->flags = flags;
239
240 /* If nothing changed effectively, no need to issue ioctl */
241 if (s->migration_log) {
242 flags |= KVM_MEM_LOG_DIRTY_PAGES;
243 }
244 if (flags == old_flags) {
245 return 0;
246 }
247
248 return kvm_set_user_memory_region(s, mem);
249}
250
David 'Digit' Turnerbcde1092014-01-09 23:19:19 +0100251int kvm_log_start(hwaddr phys_addr, ram_addr_t size)
Jun Nakajima1321c762011-03-04 17:17:45 -0800252{
253 return kvm_dirty_pages_log_change(phys_addr, size,
254 KVM_MEM_LOG_DIRTY_PAGES,
255 KVM_MEM_LOG_DIRTY_PAGES);
256}
257
David 'Digit' Turnerbcde1092014-01-09 23:19:19 +0100258int kvm_log_stop(hwaddr phys_addr, ram_addr_t size)
Jun Nakajima1321c762011-03-04 17:17:45 -0800259{
260 return kvm_dirty_pages_log_change(phys_addr, size,
261 0,
262 KVM_MEM_LOG_DIRTY_PAGES);
263}
264
265int kvm_set_migration_log(int enable)
266{
267 KVMState *s = kvm_state;
268 KVMSlot *mem;
269 int i, err;
270
271 s->migration_log = enable;
272
273 for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
274 mem = &s->slots[i];
275
276 if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
277 continue;
278 }
279 err = kvm_set_user_memory_region(s, mem);
280 if (err) {
281 return err;
282 }
283 }
284 return 0;
285}
286
287/**
288 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
289 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
290 * This means all bits are set to dirty.
291 *
292 * @start_add: start of logged region.
293 * @end_addr: end of logged region.
294 */
David 'Digit' Turnerbcde1092014-01-09 23:19:19 +0100295int kvm_physical_sync_dirty_bitmap(hwaddr start_addr,
296 hwaddr end_addr)
Jun Nakajima1321c762011-03-04 17:17:45 -0800297{
298 KVMState *s = kvm_state;
299 unsigned long size, allocated_size = 0;
David 'Digit' Turnerbcde1092014-01-09 23:19:19 +0100300 hwaddr phys_addr;
Jun Nakajima1321c762011-03-04 17:17:45 -0800301 ram_addr_t addr;
302 KVMDirtyLog d;
303 KVMSlot *mem;
304 int ret = 0;
305
306 d.dirty_bitmap = NULL;
307 while (start_addr < end_addr) {
308 mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
309 if (mem == NULL) {
310 break;
311 }
312
313 size = ((mem->memory_size >> TARGET_PAGE_BITS) + 7) / 8;
314 if (!d.dirty_bitmap) {
David 'Digit' Turneraa8236d2014-01-10 17:02:29 +0100315 d.dirty_bitmap = g_malloc(size);
Jun Nakajima1321c762011-03-04 17:17:45 -0800316 } else if (size > allocated_size) {
David 'Digit' Turneraa8236d2014-01-10 17:02:29 +0100317 d.dirty_bitmap = g_realloc(d.dirty_bitmap, size);
Jun Nakajima1321c762011-03-04 17:17:45 -0800318 }
319 allocated_size = size;
320 memset(d.dirty_bitmap, 0, allocated_size);
321
322 d.slot = mem->slot;
323
324 if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
325 dprintf("ioctl failed %d\n", errno);
326 ret = -1;
327 break;
328 }
329
330 for (phys_addr = mem->start_addr, addr = mem->phys_offset;
331 phys_addr < mem->start_addr + mem->memory_size;
332 phys_addr += TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
333 unsigned long *bitmap = (unsigned long *)d.dirty_bitmap;
334 unsigned nr = (phys_addr - mem->start_addr) >> TARGET_PAGE_BITS;
335 unsigned word = nr / (sizeof(*bitmap) * 8);
336 unsigned bit = nr % (sizeof(*bitmap) * 8);
337
338 if ((bitmap[word] >> bit) & 1) {
339 cpu_physical_memory_set_dirty(addr);
340 }
341 }
342 start_addr = phys_addr;
343 }
David 'Digit' Turneraa8236d2014-01-10 17:02:29 +0100344 g_free(d.dirty_bitmap);
Jun Nakajima1321c762011-03-04 17:17:45 -0800345
346 return ret;
347}
348
David 'Digit' Turnerbcde1092014-01-09 23:19:19 +0100349int kvm_coalesce_mmio_region(hwaddr start, ram_addr_t size)
Jun Nakajima1321c762011-03-04 17:17:45 -0800350{
351 int ret = -ENOSYS;
352#ifdef KVM_CAP_COALESCED_MMIO
353 KVMState *s = kvm_state;
354
355 if (s->coalesced_mmio) {
356 struct kvm_coalesced_mmio_zone zone;
357
358 zone.addr = start;
359 zone.size = size;
360
361 ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
362 }
363#endif
364
365 return ret;
366}
367
David 'Digit' Turnerbcde1092014-01-09 23:19:19 +0100368int kvm_uncoalesce_mmio_region(hwaddr start, ram_addr_t size)
Jun Nakajima1321c762011-03-04 17:17:45 -0800369{
370 int ret = -ENOSYS;
371#ifdef KVM_CAP_COALESCED_MMIO
372 KVMState *s = kvm_state;
373
374 if (s->coalesced_mmio) {
375 struct kvm_coalesced_mmio_zone zone;
376
377 zone.addr = start;
378 zone.size = size;
379
380 ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
381 }
382#endif
383
384 return ret;
385}
386
387int kvm_check_extension(KVMState *s, unsigned int extension)
388{
389 int ret;
390
391 ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
392 if (ret < 0) {
393 ret = 0;
394 }
395
396 return ret;
397}
398
399static void kvm_reset_vcpus(void *opaque)
400{
401 kvm_sync_vcpus();
402}
403
404int kvm_init(int smp_cpus)
405{
406 static const char upgrade_note[] =
407 "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
408 "(see http://sourceforge.net/projects/kvm).\n";
409 KVMState *s;
410 int ret;
411 int i;
412
413 if (smp_cpus > 1) {
414 fprintf(stderr, "No SMP KVM support, use '-smp 1'\n");
415 return -EINVAL;
416 }
417
David 'Digit' Turneraa8236d2014-01-10 17:02:29 +0100418 s = g_malloc0(sizeof(KVMState));
Jun Nakajima1321c762011-03-04 17:17:45 -0800419
420#ifdef KVM_CAP_SET_GUEST_DEBUG
421 QTAILQ_INIT(&s->kvm_sw_breakpoints);
422#endif
423 for (i = 0; i < ARRAY_SIZE(s->slots); i++)
424 s->slots[i].slot = i;
425
426 s->vmfd = -1;
427 s->fd = open("/dev/kvm", O_RDWR);
428 if (s->fd == -1) {
Jun Nakajima1321c762011-03-04 17:17:45 -0800429 ret = -errno;
Andy McFadden7876c702012-03-15 13:17:57 -0700430 fprintf(stderr, "Could not access KVM kernel module: %m\n");
Jun Nakajima1321c762011-03-04 17:17:45 -0800431 goto err;
432 }
433
434 ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
435 if (ret < KVM_API_VERSION) {
436 if (ret > 0)
437 ret = -EINVAL;
438 fprintf(stderr, "kvm version too old\n");
439 goto err;
440 }
441
442 if (ret > KVM_API_VERSION) {
443 ret = -EINVAL;
444 fprintf(stderr, "kvm version not supported\n");
445 goto err;
446 }
447
Tom Knych1d0e5942013-01-18 09:52:43 -0800448 do {
449 s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
450 } while (s->vmfd < 0 && (EINTR == errno || EAGAIN == errno));
451
Andy McFadden7876c702012-03-15 13:17:57 -0700452 if (s->vmfd < 0) {
453 ret = -errno;
Tom Knych1d0e5942013-01-18 09:52:43 -0800454 fprintf(stderr, "ioctl(KVM_CREATE_VM) failed: %d %s\n", errno, strerror(errno));
Jun Nakajima1321c762011-03-04 17:17:45 -0800455 goto err;
Andy McFadden7876c702012-03-15 13:17:57 -0700456 }
Jun Nakajima1321c762011-03-04 17:17:45 -0800457
458 /* initially, KVM allocated its own memory and we had to jump through
459 * hooks to make phys_ram_base point to this. Modern versions of KVM
460 * just use a user allocated buffer so we can use regular pages
461 * unmodified. Make sure we have a sufficiently modern version of KVM.
462 */
463 if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
464 ret = -EINVAL;
465 fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n%s",
466 upgrade_note);
467 goto err;
468 }
469
470 /* There was a nasty bug in < kvm-80 that prevents memory slots from being
471 * destroyed properly. Since we rely on this capability, refuse to work
472 * with any kernel without this capability. */
473 if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
474 ret = -EINVAL;
475
476 fprintf(stderr,
477 "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
478 upgrade_note);
479 goto err;
480 }
481
482#ifdef KVM_CAP_COALESCED_MMIO
483 s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
484#else
485 s->coalesced_mmio = 0;
486#endif
487
488 s->broken_set_mem_region = 1;
489#ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
490 ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
491 if (ret > 0) {
492 s->broken_set_mem_region = 0;
493 }
494#endif
495
496 ret = kvm_arch_init(s, smp_cpus);
497 if (ret < 0)
498 goto err;
499
500 qemu_register_reset(kvm_reset_vcpus, INT_MAX, NULL);
501
502 kvm_state = s;
503
504 return 0;
505
506err:
507 if (s) {
508 if (s->vmfd != -1)
509 close(s->vmfd);
510 if (s->fd != -1)
511 close(s->fd);
512 }
David 'Digit' Turneraa8236d2014-01-10 17:02:29 +0100513 g_free(s);
Jun Nakajima1321c762011-03-04 17:17:45 -0800514
515 return ret;
516}
517
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100518static int kvm_handle_io(CPUOldState *env, uint16_t port, void *data,
Jun Nakajima1321c762011-03-04 17:17:45 -0800519 int direction, int size, uint32_t count)
520{
521 int i;
522 uint8_t *ptr = data;
523
524 for (i = 0; i < count; i++) {
525 if (direction == KVM_EXIT_IO_IN) {
526 switch (size) {
527 case 1:
528 stb_p(ptr, cpu_inb(port));
529 break;
530 case 2:
531 stw_p(ptr, cpu_inw(port));
532 break;
533 case 4:
534 stl_p(ptr, cpu_inl(port));
535 break;
536 }
537 } else {
538 switch (size) {
539 case 1:
540 cpu_outb(port, ldub_p(ptr));
541 break;
542 case 2:
543 cpu_outw(port, lduw_p(ptr));
544 break;
545 case 4:
546 cpu_outl(port, ldl_p(ptr));
547 break;
548 }
549 }
550
551 ptr += size;
552 }
553
554 return 1;
555}
556
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100557static void kvm_run_coalesced_mmio(CPUOldState *env, struct kvm_run *run)
Jun Nakajima1321c762011-03-04 17:17:45 -0800558{
559#ifdef KVM_CAP_COALESCED_MMIO
560 KVMState *s = kvm_state;
561 if (s->coalesced_mmio) {
562 struct kvm_coalesced_mmio_ring *ring;
563
564 ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
565 while (ring->first != ring->last) {
566 struct kvm_coalesced_mmio *ent;
567
568 ent = &ring->coalesced_mmio[ring->first];
569
570 cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
571 /* FIXME smp_wmb() */
572 ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
573 }
574 }
575#endif
576}
577
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100578int kvm_cpu_exec(CPUOldState *env)
Jun Nakajima1321c762011-03-04 17:17:45 -0800579{
580 struct kvm_run *run = env->kvm_run;
581 int ret;
582
583 dprintf("kvm_cpu_exec()\n");
584
585 do {
586 if (env->exit_request) {
587 dprintf("interrupt exit requested\n");
588 ret = 0;
589 break;
590 }
591
592 kvm_arch_pre_run(env, run);
Jun Nakajimabb0140b2011-05-27 18:24:21 -0700593 ret = kvm_arch_vcpu_run(env);
Jun Nakajima1321c762011-03-04 17:17:45 -0800594 kvm_arch_post_run(env, run);
595
596 if (ret == -EINTR || ret == -EAGAIN) {
597 dprintf("io window exit\n");
598 ret = 0;
599 break;
600 }
601
602 if (ret < 0) {
603 dprintf("kvm run failed %s\n", strerror(-ret));
604 abort();
605 }
606
607 kvm_run_coalesced_mmio(env, run);
608
609 ret = 0; /* exit loop */
610 switch (run->exit_reason) {
611 case KVM_EXIT_IO:
612 dprintf("handle_io\n");
613 ret = kvm_handle_io(env, run->io.port,
614 (uint8_t *)run + run->io.data_offset,
615 run->io.direction,
616 run->io.size,
617 run->io.count);
618 break;
619 case KVM_EXIT_MMIO:
620 dprintf("handle_mmio\n");
621 cpu_physical_memory_rw(run->mmio.phys_addr,
622 run->mmio.data,
623 run->mmio.len,
624 run->mmio.is_write);
625 ret = 1;
626 break;
627 case KVM_EXIT_IRQ_WINDOW_OPEN:
628 dprintf("irq_window_open\n");
629 break;
630 case KVM_EXIT_SHUTDOWN:
631 dprintf("shutdown\n");
632 qemu_system_reset_request();
633 ret = 1;
634 break;
635 case KVM_EXIT_UNKNOWN:
636 dprintf("kvm_exit_unknown\n");
637 break;
638 case KVM_EXIT_FAIL_ENTRY:
639 dprintf("kvm_exit_fail_entry\n");
640 break;
641 case KVM_EXIT_EXCEPTION:
642 dprintf("kvm_exit_exception\n");
643 break;
644 case KVM_EXIT_DEBUG:
645 dprintf("kvm_exit_debug\n");
646#ifdef KVM_CAP_SET_GUEST_DEBUG
647 if (kvm_arch_debug(&run->debug.arch)) {
648 gdb_set_stop_cpu(env);
649 vm_stop(EXCP_DEBUG);
650 env->exception_index = EXCP_DEBUG;
651 return 0;
652 }
653 /* re-enter, this exception was guest-internal */
654 ret = 1;
655#endif /* KVM_CAP_SET_GUEST_DEBUG */
656 break;
657 default:
658 dprintf("kvm_arch_handle_exit\n");
659 ret = kvm_arch_handle_exit(env, run);
660 break;
661 }
662 } while (ret > 0);
663
664 if (env->exit_request) {
665 env->exit_request = 0;
666 env->exception_index = EXCP_INTERRUPT;
667 }
668
669 return ret;
670}
671
David 'Digit' Turnerbcde1092014-01-09 23:19:19 +0100672void kvm_set_phys_mem(hwaddr start_addr,
Jun Nakajima1321c762011-03-04 17:17:45 -0800673 ram_addr_t size,
674 ram_addr_t phys_offset)
675{
676 KVMState *s = kvm_state;
677 ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
678 KVMSlot *mem, old;
679 int err;
680
681 if (start_addr & ~TARGET_PAGE_MASK) {
682 if (flags >= IO_MEM_UNASSIGNED) {
683 if (!kvm_lookup_overlapping_slot(s, start_addr,
684 start_addr + size)) {
685 return;
686 }
687 fprintf(stderr, "Unaligned split of a KVM memory slot\n");
688 } else {
689 fprintf(stderr, "Only page-aligned memory slots supported\n");
690 }
691 abort();
692 }
693
694 /* KVM does not support read-only slots */
695 phys_offset &= ~IO_MEM_ROM;
696
697 while (1) {
698 mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
699 if (!mem) {
700 break;
701 }
702
703 if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
704 (start_addr + size <= mem->start_addr + mem->memory_size) &&
705 (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
706 /* The new slot fits into the existing one and comes with
707 * identical parameters - nothing to be done. */
708 return;
709 }
710
711 old = *mem;
712
713 /* unregister the overlapping slot */
714 mem->memory_size = 0;
715 err = kvm_set_user_memory_region(s, mem);
716 if (err) {
717 fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
718 __func__, strerror(-err));
719 abort();
720 }
721
722 /* Workaround for older KVM versions: we can't join slots, even not by
723 * unregistering the previous ones and then registering the larger
724 * slot. We have to maintain the existing fragmentation. Sigh.
725 *
726 * This workaround assumes that the new slot starts at the same
727 * address as the first existing one. If not or if some overlapping
728 * slot comes around later, we will fail (not seen in practice so far)
729 * - and actually require a recent KVM version. */
730 if (s->broken_set_mem_region &&
731 old.start_addr == start_addr && old.memory_size < size &&
732 flags < IO_MEM_UNASSIGNED) {
733 mem = kvm_alloc_slot(s);
734 mem->memory_size = old.memory_size;
735 mem->start_addr = old.start_addr;
736 mem->phys_offset = old.phys_offset;
737 mem->flags = 0;
738
739 err = kvm_set_user_memory_region(s, mem);
740 if (err) {
741 fprintf(stderr, "%s: error updating slot: %s\n", __func__,
742 strerror(-err));
743 abort();
744 }
745
746 start_addr += old.memory_size;
747 phys_offset += old.memory_size;
748 size -= old.memory_size;
749 continue;
750 }
751
752 /* register prefix slot */
753 if (old.start_addr < start_addr) {
754 mem = kvm_alloc_slot(s);
755 mem->memory_size = start_addr - old.start_addr;
756 mem->start_addr = old.start_addr;
757 mem->phys_offset = old.phys_offset;
758 mem->flags = 0;
759
760 err = kvm_set_user_memory_region(s, mem);
761 if (err) {
762 fprintf(stderr, "%s: error registering prefix slot: %s\n",
763 __func__, strerror(-err));
764 abort();
765 }
766 }
767
768 /* register suffix slot */
769 if (old.start_addr + old.memory_size > start_addr + size) {
770 ram_addr_t size_delta;
771
772 mem = kvm_alloc_slot(s);
773 mem->start_addr = start_addr + size;
774 size_delta = mem->start_addr - old.start_addr;
775 mem->memory_size = old.memory_size - size_delta;
776 mem->phys_offset = old.phys_offset + size_delta;
777 mem->flags = 0;
778
779 err = kvm_set_user_memory_region(s, mem);
780 if (err) {
781 fprintf(stderr, "%s: error registering suffix slot: %s\n",
782 __func__, strerror(-err));
783 abort();
784 }
785 }
786 }
787
788 /* in case the KVM bug workaround already "consumed" the new slot */
789 if (!size)
790 return;
791
792 /* KVM does not need to know about this memory */
793 if (flags >= IO_MEM_UNASSIGNED)
794 return;
795
796 mem = kvm_alloc_slot(s);
797 mem->memory_size = size;
798 mem->start_addr = start_addr;
799 mem->phys_offset = phys_offset;
800 mem->flags = 0;
801
802 err = kvm_set_user_memory_region(s, mem);
803 if (err) {
804 fprintf(stderr, "%s: error registering slot: %s\n", __func__,
805 strerror(-err));
806 abort();
807 }
808}
809
810int kvm_ioctl(KVMState *s, int type, ...)
811{
812 int ret;
813 void *arg;
814 va_list ap;
815
816 va_start(ap, type);
817 arg = va_arg(ap, void *);
818 va_end(ap);
819
820 ret = ioctl(s->fd, type, arg);
821 if (ret == -1)
822 ret = -errno;
823
824 return ret;
825}
826
827int kvm_vm_ioctl(KVMState *s, int type, ...)
828{
829 int ret;
830 void *arg;
831 va_list ap;
832
833 va_start(ap, type);
834 arg = va_arg(ap, void *);
835 va_end(ap);
836
837 ret = ioctl(s->vmfd, type, arg);
838 if (ret == -1)
839 ret = -errno;
840
841 return ret;
842}
843
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100844int kvm_vcpu_ioctl(CPUOldState *env, int type, ...)
Jun Nakajima1321c762011-03-04 17:17:45 -0800845{
846 int ret;
847 void *arg;
848 va_list ap;
849
850 va_start(ap, type);
851 arg = va_arg(ap, void *);
852 va_end(ap);
853
854 ret = ioctl(env->kvm_fd, type, arg);
855 if (ret == -1)
856 ret = -errno;
857
858 return ret;
859}
860
861int kvm_has_sync_mmu(void)
862{
863#ifdef KVM_CAP_SYNC_MMU
864 KVMState *s = kvm_state;
865
866 return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
867#else
868 return 0;
869#endif
870}
871
872void kvm_setup_guest_memory(void *start, size_t size)
873{
874 if (!kvm_has_sync_mmu()) {
875#ifdef MADV_DONTFORK
876 int ret = madvise(start, size, MADV_DONTFORK);
877
878 if (ret) {
879 perror("madvice");
880 exit(1);
881 }
882#else
883 fprintf(stderr,
884 "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
885 exit(1);
886#endif
887 }
888}
889
890#ifdef KVM_CAP_SET_GUEST_DEBUG
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100891struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUOldState *env,
Jun Nakajima1321c762011-03-04 17:17:45 -0800892 target_ulong pc)
893{
894 struct kvm_sw_breakpoint *bp;
895
896 QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
897 if (bp->pc == pc)
898 return bp;
899 }
900 return NULL;
901}
902
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100903int kvm_sw_breakpoints_active(CPUOldState *env)
Jun Nakajima1321c762011-03-04 17:17:45 -0800904{
905 return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
906}
907
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100908int kvm_update_guest_debug(CPUOldState *env, unsigned long reinject_trap)
Jun Nakajima1321c762011-03-04 17:17:45 -0800909{
910 struct kvm_guest_debug dbg;
911
912 dbg.control = 0;
913 if (env->singlestep_enabled)
914 dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
915
916 kvm_arch_update_guest_debug(env, &dbg);
917 dbg.control |= reinject_trap;
918
919 return kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg);
920}
921
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100922int kvm_insert_breakpoint(CPUOldState *current_env, target_ulong addr,
Jun Nakajima1321c762011-03-04 17:17:45 -0800923 target_ulong len, int type)
924{
925 struct kvm_sw_breakpoint *bp;
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100926 CPUOldState *env;
Jun Nakajima1321c762011-03-04 17:17:45 -0800927 int err;
928
929 if (type == GDB_BREAKPOINT_SW) {
930 bp = kvm_find_sw_breakpoint(current_env, addr);
931 if (bp) {
932 bp->use_count++;
933 return 0;
934 }
935
David 'Digit' Turneraa8236d2014-01-10 17:02:29 +0100936 bp = g_malloc(sizeof(struct kvm_sw_breakpoint));
Jun Nakajima1321c762011-03-04 17:17:45 -0800937 if (!bp)
938 return -ENOMEM;
939
940 bp->pc = addr;
941 bp->use_count = 1;
942 err = kvm_arch_insert_sw_breakpoint(current_env, bp);
943 if (err) {
944 free(bp);
945 return err;
946 }
947
948 QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
949 bp, entry);
950 } else {
951 err = kvm_arch_insert_hw_breakpoint(addr, len, type);
952 if (err)
953 return err;
954 }
955
David 'Digit' Turner4ab12252014-03-24 11:29:53 +0100956 CPU_FOREACH(env) {
Jun Nakajima1321c762011-03-04 17:17:45 -0800957 err = kvm_update_guest_debug(env, 0);
958 if (err)
959 return err;
960 }
961 return 0;
962}
963
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100964int kvm_remove_breakpoint(CPUOldState *current_env, target_ulong addr,
Jun Nakajima1321c762011-03-04 17:17:45 -0800965 target_ulong len, int type)
966{
967 struct kvm_sw_breakpoint *bp;
David 'Digit' Turnere2678e12014-01-16 15:56:43 +0100968 CPUOldState *env;
Jun Nakajima1321c762011-03-04 17:17:45 -0800969 int err;
970
971 if (type == GDB_BREAKPOINT_SW) {
972 bp = kvm_find_sw_breakpoint(current_env, addr);
973 if (!bp)
974 return -ENOENT;
975
976 if (bp->use_count > 1) {
977 bp->use_count--;
978 return 0;
979 }
980
981 err = kvm_arch_remove_sw_breakpoint(current_env, bp);
982 if (err)
983 return err;
984
985 QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
David 'Digit' Turneraa8236d2014-01-10 17:02:29 +0100986 g_free(bp);
Jun Nakajima1321c762011-03-04 17:17:45 -0800987 } else {
988 err = kvm_arch_remove_hw_breakpoint(addr, len, type);
989 if (err)
990 return err;
991 }
992
David 'Digit' Turner4ab12252014-03-24 11:29:53 +0100993 CPU_FOREACH(env) {
Jun Nakajima1321c762011-03-04 17:17:45 -0800994 err = kvm_update_guest_debug(env, 0);
995 if (err)
996 return err;
997 }
998 return 0;
999}
1000
David 'Digit' Turnere2678e12014-01-16 15:56:43 +01001001void kvm_remove_all_breakpoints(CPUOldState *current_env)
Jun Nakajima1321c762011-03-04 17:17:45 -08001002{
1003 struct kvm_sw_breakpoint *bp, *next;
1004 KVMState *s = current_env->kvm_state;
David 'Digit' Turnere2678e12014-01-16 15:56:43 +01001005 CPUOldState *env;
Jun Nakajima1321c762011-03-04 17:17:45 -08001006
1007 QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
1008 if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
1009 /* Try harder to find a CPU that currently sees the breakpoint. */
David 'Digit' Turner4ab12252014-03-24 11:29:53 +01001010 CPU_FOREACH(env) {
Jun Nakajima1321c762011-03-04 17:17:45 -08001011 if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
1012 break;
1013 }
1014 }
1015 }
1016 kvm_arch_remove_all_hw_breakpoints();
1017
David 'Digit' Turner4ab12252014-03-24 11:29:53 +01001018 CPU_FOREACH(env) {
Jun Nakajima1321c762011-03-04 17:17:45 -08001019 kvm_update_guest_debug(env, 0);
David 'Digit' Turner4ab12252014-03-24 11:29:53 +01001020 }
Jun Nakajima1321c762011-03-04 17:17:45 -08001021}
1022
1023#else /* !KVM_CAP_SET_GUEST_DEBUG */
1024
David 'Digit' Turnere2678e12014-01-16 15:56:43 +01001025int kvm_update_guest_debug(CPUOldState *env, unsigned long reinject_trap)
Jun Nakajima1321c762011-03-04 17:17:45 -08001026{
1027 return -EINVAL;
1028}
1029
David 'Digit' Turnere2678e12014-01-16 15:56:43 +01001030int kvm_insert_breakpoint(CPUOldState *current_env, target_ulong addr,
Jun Nakajima1321c762011-03-04 17:17:45 -08001031 target_ulong len, int type)
1032{
1033 return -EINVAL;
1034}
1035
David 'Digit' Turnere2678e12014-01-16 15:56:43 +01001036int kvm_remove_breakpoint(CPUOldState *current_env, target_ulong addr,
Jun Nakajima1321c762011-03-04 17:17:45 -08001037 target_ulong len, int type)
1038{
1039 return -EINVAL;
1040}
1041
David 'Digit' Turnere2678e12014-01-16 15:56:43 +01001042void kvm_remove_all_breakpoints(CPUOldState *current_env)
Jun Nakajima1321c762011-03-04 17:17:45 -08001043{
1044}
1045#endif /* !KVM_CAP_SET_GUEST_DEBUG */