blob: af862c30b70e5a850be7243cd341965814e8c6c8 [file] [log] [blame]
Paul Mackerrasde56a942011-06-29 00:21:34 +00001/*
2 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
3 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
4 *
5 * Authors:
6 * Paul Mackerras <paulus@au1.ibm.com>
7 * Alexander Graf <agraf@suse.de>
8 * Kevin Wolf <mail@kevin-wolf.de>
9 *
10 * Description: KVM functions specific to running on Book 3S
11 * processors in hypervisor mode (specifically POWER7 and later).
12 *
13 * This file is derived from arch/powerpc/kvm/book3s.c,
14 * by Alexander Graf <agraf@suse.de>.
15 *
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License, version 2, as
18 * published by the Free Software Foundation.
19 */
20
21#include <linux/kvm_host.h>
22#include <linux/err.h>
23#include <linux/slab.h>
24#include <linux/preempt.h>
25#include <linux/sched.h>
26#include <linux/delay.h>
27#include <linux/fs.h>
28#include <linux/anon_inodes.h>
29#include <linux/cpumask.h>
30
31#include <asm/reg.h>
32#include <asm/cputable.h>
33#include <asm/cacheflush.h>
34#include <asm/tlbflush.h>
35#include <asm/uaccess.h>
36#include <asm/io.h>
37#include <asm/kvm_ppc.h>
38#include <asm/kvm_book3s.h>
39#include <asm/mmu_context.h>
40#include <asm/lppaca.h>
41#include <asm/processor.h>
42#include <linux/gfp.h>
43#include <linux/sched.h>
44#include <linux/vmalloc.h>
45#include <linux/highmem.h>
46
47/* #define EXIT_DEBUG */
48/* #define EXIT_DEBUG_SIMPLE */
49/* #define EXIT_DEBUG_INT */
50
51void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
52{
53 local_paca->kvm_hstate.kvm_vcpu = vcpu;
54}
55
56void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
57{
58}
59
60void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
61{
62 u64 now;
63 unsigned long dec_nsec;
64
65 now = get_tb();
66 if (now >= vcpu->arch.dec_expires && !kvmppc_core_pending_dec(vcpu))
67 kvmppc_core_queue_dec(vcpu);
68 if (vcpu->arch.pending_exceptions)
69 return;
70 if (vcpu->arch.dec_expires != ~(u64)0) {
71 dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC /
72 tb_ticks_per_sec;
73 hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
74 HRTIMER_MODE_REL);
75 }
76
77 kvm_vcpu_block(vcpu);
78 vcpu->stat.halt_wakeup++;
79
80 if (vcpu->arch.dec_expires != ~(u64)0)
81 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
82}
83
84void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
85{
86 vcpu->arch.shregs.msr = msr;
87}
88
89void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
90{
91 vcpu->arch.pvr = pvr;
92}
93
94void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
95{
96 int r;
97
98 pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
99 pr_err("pc = %.16lx msr = %.16llx trap = %x\n",
100 vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap);
101 for (r = 0; r < 16; ++r)
102 pr_err("r%2d = %.16lx r%d = %.16lx\n",
103 r, kvmppc_get_gpr(vcpu, r),
104 r+16, kvmppc_get_gpr(vcpu, r+16));
105 pr_err("ctr = %.16lx lr = %.16lx\n",
106 vcpu->arch.ctr, vcpu->arch.lr);
107 pr_err("srr0 = %.16llx srr1 = %.16llx\n",
108 vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
109 pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
110 vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
111 pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
112 vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
113 pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n",
114 vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr);
115 pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
116 pr_err("fault dar = %.16lx dsisr = %.8x\n",
117 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
118 pr_err("SLB (%d entries):\n", vcpu->arch.slb_max);
119 for (r = 0; r < vcpu->arch.slb_max; ++r)
120 pr_err(" ESID = %.16llx VSID = %.16llx\n",
121 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
122 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
123 vcpu->arch.lpcr, vcpu->kvm->arch.sdr1,
124 vcpu->arch.last_inst);
125}
126
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000127struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
128{
129 int r;
130 struct kvm_vcpu *v, *ret = NULL;
131
132 mutex_lock(&kvm->lock);
133 kvm_for_each_vcpu(r, v, kvm) {
134 if (v->vcpu_id == id) {
135 ret = v;
136 break;
137 }
138 }
139 mutex_unlock(&kvm->lock);
140 return ret;
141}
142
143static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
144{
145 vpa->shared_proc = 1;
146 vpa->yield_count = 1;
147}
148
149static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
150 unsigned long flags,
151 unsigned long vcpuid, unsigned long vpa)
152{
153 struct kvm *kvm = vcpu->kvm;
154 unsigned long pg_index, ra, len;
155 unsigned long pg_offset;
156 void *va;
157 struct kvm_vcpu *tvcpu;
158
159 tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
160 if (!tvcpu)
161 return H_PARAMETER;
162
163 flags >>= 63 - 18;
164 flags &= 7;
165 if (flags == 0 || flags == 4)
166 return H_PARAMETER;
167 if (flags < 4) {
168 if (vpa & 0x7f)
169 return H_PARAMETER;
170 /* registering new area; convert logical addr to real */
171 pg_index = vpa >> kvm->arch.ram_porder;
172 pg_offset = vpa & (kvm->arch.ram_psize - 1);
173 if (pg_index >= kvm->arch.ram_npages)
174 return H_PARAMETER;
175 if (kvm->arch.ram_pginfo[pg_index].pfn == 0)
176 return H_PARAMETER;
177 ra = kvm->arch.ram_pginfo[pg_index].pfn << PAGE_SHIFT;
178 ra |= pg_offset;
179 va = __va(ra);
180 if (flags <= 1)
181 len = *(unsigned short *)(va + 4);
182 else
183 len = *(unsigned int *)(va + 4);
184 if (pg_offset + len > kvm->arch.ram_psize)
185 return H_PARAMETER;
186 switch (flags) {
187 case 1: /* register VPA */
188 if (len < 640)
189 return H_PARAMETER;
190 tvcpu->arch.vpa = va;
191 init_vpa(vcpu, va);
192 break;
193 case 2: /* register DTL */
194 if (len < 48)
195 return H_PARAMETER;
196 if (!tvcpu->arch.vpa)
197 return H_RESOURCE;
198 len -= len % 48;
199 tvcpu->arch.dtl = va;
200 tvcpu->arch.dtl_end = va + len;
201 break;
202 case 3: /* register SLB shadow buffer */
203 if (len < 8)
204 return H_PARAMETER;
205 if (!tvcpu->arch.vpa)
206 return H_RESOURCE;
207 tvcpu->arch.slb_shadow = va;
208 len = (len - 16) / 16;
209 tvcpu->arch.slb_shadow = va;
210 break;
211 }
212 } else {
213 switch (flags) {
214 case 5: /* unregister VPA */
215 if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl)
216 return H_RESOURCE;
217 tvcpu->arch.vpa = NULL;
218 break;
219 case 6: /* unregister DTL */
220 tvcpu->arch.dtl = NULL;
221 break;
222 case 7: /* unregister SLB shadow buffer */
223 tvcpu->arch.slb_shadow = NULL;
224 break;
225 }
226 }
227 return H_SUCCESS;
228}
229
230int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
231{
232 unsigned long req = kvmppc_get_gpr(vcpu, 3);
233 unsigned long target, ret = H_SUCCESS;
234 struct kvm_vcpu *tvcpu;
235
236 switch (req) {
237 case H_CEDE:
238 vcpu->arch.shregs.msr |= MSR_EE;
239 vcpu->arch.ceded = 1;
240 smp_mb();
241 if (!vcpu->arch.prodded)
242 kvmppc_vcpu_block(vcpu);
243 else
244 vcpu->arch.prodded = 0;
245 smp_mb();
246 vcpu->arch.ceded = 0;
247 break;
248 case H_PROD:
249 target = kvmppc_get_gpr(vcpu, 4);
250 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
251 if (!tvcpu) {
252 ret = H_PARAMETER;
253 break;
254 }
255 tvcpu->arch.prodded = 1;
256 smp_mb();
257 if (vcpu->arch.ceded) {
258 if (waitqueue_active(&vcpu->wq)) {
259 wake_up_interruptible(&vcpu->wq);
260 vcpu->stat.halt_wakeup++;
261 }
262 }
263 break;
264 case H_CONFER:
265 break;
266 case H_REGISTER_VPA:
267 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
268 kvmppc_get_gpr(vcpu, 5),
269 kvmppc_get_gpr(vcpu, 6));
270 break;
271 default:
272 return RESUME_HOST;
273 }
274 kvmppc_set_gpr(vcpu, 3, ret);
275 vcpu->arch.hcall_needed = 0;
276 return RESUME_GUEST;
277}
278
Paul Mackerrasde56a942011-06-29 00:21:34 +0000279static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
280 struct task_struct *tsk)
281{
282 int r = RESUME_HOST;
283
284 vcpu->stat.sum_exits++;
285
286 run->exit_reason = KVM_EXIT_UNKNOWN;
287 run->ready_for_interrupt_injection = 1;
288 switch (vcpu->arch.trap) {
289 /* We're good on these - the host merely wanted to get our attention */
290 case BOOK3S_INTERRUPT_HV_DECREMENTER:
291 vcpu->stat.dec_exits++;
292 r = RESUME_GUEST;
293 break;
294 case BOOK3S_INTERRUPT_EXTERNAL:
295 vcpu->stat.ext_intr_exits++;
296 r = RESUME_GUEST;
297 break;
298 case BOOK3S_INTERRUPT_PERFMON:
299 r = RESUME_GUEST;
300 break;
301 case BOOK3S_INTERRUPT_PROGRAM:
302 {
303 ulong flags;
304 /*
305 * Normally program interrupts are delivered directly
306 * to the guest by the hardware, but we can get here
307 * as a result of a hypervisor emulation interrupt
308 * (e40) getting turned into a 700 by BML RTAS.
309 */
310 flags = vcpu->arch.shregs.msr & 0x1f0000ull;
311 kvmppc_core_queue_program(vcpu, flags);
312 r = RESUME_GUEST;
313 break;
314 }
315 case BOOK3S_INTERRUPT_SYSCALL:
316 {
317 /* hcall - punt to userspace */
318 int i;
319
320 if (vcpu->arch.shregs.msr & MSR_PR) {
321 /* sc 1 from userspace - reflect to guest syscall */
322 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_SYSCALL);
323 r = RESUME_GUEST;
324 break;
325 }
326 run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
327 for (i = 0; i < 9; ++i)
328 run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
329 run->exit_reason = KVM_EXIT_PAPR_HCALL;
330 vcpu->arch.hcall_needed = 1;
331 r = RESUME_HOST;
332 break;
333 }
334 /*
335 * We get these next two if the guest does a bad real-mode access,
336 * as we have enabled VRMA (virtualized real mode area) mode in the
337 * LPCR. We just generate an appropriate DSI/ISI to the guest.
338 */
339 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
340 vcpu->arch.shregs.dsisr = vcpu->arch.fault_dsisr;
341 vcpu->arch.shregs.dar = vcpu->arch.fault_dar;
342 kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE, 0);
343 r = RESUME_GUEST;
344 break;
345 case BOOK3S_INTERRUPT_H_INST_STORAGE:
346 kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE,
347 0x08000000);
348 r = RESUME_GUEST;
349 break;
350 /*
351 * This occurs if the guest executes an illegal instruction.
352 * We just generate a program interrupt to the guest, since
353 * we don't emulate any guest instructions at this stage.
354 */
355 case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
356 kvmppc_core_queue_program(vcpu, 0x80000);
357 r = RESUME_GUEST;
358 break;
359 default:
360 kvmppc_dump_regs(vcpu);
361 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
362 vcpu->arch.trap, kvmppc_get_pc(vcpu),
363 vcpu->arch.shregs.msr);
364 r = RESUME_HOST;
365 BUG();
366 break;
367 }
368
369
370 if (!(r & RESUME_HOST)) {
371 /* To avoid clobbering exit_reason, only check for signals if
372 * we aren't already exiting to userspace for some other
373 * reason. */
374 if (signal_pending(tsk)) {
375 vcpu->stat.signal_exits++;
376 run->exit_reason = KVM_EXIT_INTR;
377 r = -EINTR;
378 } else {
379 kvmppc_core_deliver_interrupts(vcpu);
380 }
381 }
382
383 return r;
384}
385
386int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
387 struct kvm_sregs *sregs)
388{
389 int i;
390
391 sregs->pvr = vcpu->arch.pvr;
392
393 memset(sregs, 0, sizeof(struct kvm_sregs));
394 for (i = 0; i < vcpu->arch.slb_max; i++) {
395 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
396 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
397 }
398
399 return 0;
400}
401
402int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
403 struct kvm_sregs *sregs)
404{
405 int i, j;
406
407 kvmppc_set_pvr(vcpu, sregs->pvr);
408
409 j = 0;
410 for (i = 0; i < vcpu->arch.slb_nr; i++) {
411 if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) {
412 vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe;
413 vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv;
414 ++j;
415 }
416 }
417 vcpu->arch.slb_max = j;
418
419 return 0;
420}
421
422int kvmppc_core_check_processor_compat(void)
423{
424 if (cpu_has_feature(CPU_FTR_HVMODE_206))
425 return 0;
426 return -EIO;
427}
428
429struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
430{
431 struct kvm_vcpu *vcpu;
432 int err = -ENOMEM;
433 unsigned long lpcr;
434
435 vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
436 if (!vcpu)
437 goto out;
438
439 err = kvm_vcpu_init(vcpu, kvm, id);
440 if (err)
441 goto free_vcpu;
442
443 vcpu->arch.shared = &vcpu->arch.shregs;
444 vcpu->arch.last_cpu = -1;
445 vcpu->arch.mmcr[0] = MMCR0_FC;
446 vcpu->arch.ctrl = CTRL_RUNLATCH;
447 /* default to host PVR, since we can't spoof it */
448 vcpu->arch.pvr = mfspr(SPRN_PVR);
449 kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
450
451 lpcr = kvm->arch.host_lpcr & (LPCR_PECE | LPCR_LPES);
452 lpcr |= LPCR_VPM0 | LPCR_VRMA_L | (4UL << LPCR_DPFD_SH) | LPCR_HDICE;
453 vcpu->arch.lpcr = lpcr;
454
455 kvmppc_mmu_book3s_hv_init(vcpu);
456
457 return vcpu;
458
459free_vcpu:
460 kfree(vcpu);
461out:
462 return ERR_PTR(err);
463}
464
465void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
466{
467 kvm_vcpu_uninit(vcpu);
468 kfree(vcpu);
469}
470
471extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
472
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000473static int kvmppc_run_vcpu(struct kvm_run *run, struct kvm_vcpu *vcpu)
Paul Mackerrasde56a942011-06-29 00:21:34 +0000474{
475 u64 now;
476
477 if (signal_pending(current)) {
478 run->exit_reason = KVM_EXIT_INTR;
479 return -EINTR;
480 }
481
482 flush_fp_to_thread(current);
483 flush_altivec_to_thread(current);
484 flush_vsx_to_thread(current);
485 preempt_disable();
486
487 /*
488 * Make sure we are running on thread 0, and that
489 * secondary threads are offline.
490 * XXX we should also block attempts to bring any
491 * secondary threads online.
492 */
493 if (threads_per_core > 1) {
494 int cpu = smp_processor_id();
495 int thr = cpu_thread_in_core(cpu);
496
497 if (thr)
498 goto out;
499 while (++thr < threads_per_core)
500 if (cpu_online(cpu + thr))
501 goto out;
502 }
503
504 kvm_guest_enter();
505
506 __kvmppc_vcore_entry(NULL, vcpu);
507
508 kvm_guest_exit();
509
510 preempt_enable();
511 kvm_resched(vcpu);
512
513 now = get_tb();
514 /* cancel pending dec exception if dec is positive */
515 if (now < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu))
516 kvmppc_core_dequeue_dec(vcpu);
517
518 return kvmppc_handle_exit(run, vcpu, current);
519
520 out:
521 preempt_enable();
522 return -EBUSY;
523}
524
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000525int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
526{
527 int r;
528
529 do {
530 r = kvmppc_run_vcpu(run, vcpu);
531
532 if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
533 !(vcpu->arch.shregs.msr & MSR_PR)) {
534 r = kvmppc_pseries_do_hcall(vcpu);
535 kvmppc_core_deliver_interrupts(vcpu);
536 }
537 } while (r == RESUME_GUEST);
538 return r;
539}
540
Paul Mackerrasde56a942011-06-29 00:21:34 +0000541int kvmppc_core_prepare_memory_region(struct kvm *kvm,
542 struct kvm_userspace_memory_region *mem)
543{
544 if (mem->guest_phys_addr == 0 && mem->memory_size != 0)
545 return kvmppc_prepare_vrma(kvm, mem);
546 return 0;
547}
548
549void kvmppc_core_commit_memory_region(struct kvm *kvm,
550 struct kvm_userspace_memory_region *mem)
551{
552 if (mem->guest_phys_addr == 0 && mem->memory_size != 0)
553 kvmppc_map_vrma(kvm, mem);
554}
555
556int kvmppc_core_init_vm(struct kvm *kvm)
557{
558 long r;
559
560 /* Allocate hashed page table */
561 r = kvmppc_alloc_hpt(kvm);
562
563 return r;
564}
565
566void kvmppc_core_destroy_vm(struct kvm *kvm)
567{
568 kvmppc_free_hpt(kvm);
569}
570
571/* These are stubs for now */
572void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
573{
574}
575
576/* We don't need to emulate any privileged instructions or dcbz */
577int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
578 unsigned int inst, int *advance)
579{
580 return EMULATE_FAIL;
581}
582
583int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
584{
585 return EMULATE_FAIL;
586}
587
588int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
589{
590 return EMULATE_FAIL;
591}
592
593static int kvmppc_book3s_hv_init(void)
594{
595 int r;
596
597 r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
598
599 if (r)
600 return r;
601
602 r = kvmppc_mmu_hv_init();
603
604 return r;
605}
606
607static void kvmppc_book3s_hv_exit(void)
608{
609 kvm_exit();
610}
611
612module_init(kvmppc_book3s_hv_init);
613module_exit(kvmppc_book3s_hv_exit);