blob: 82d71388eace01c53d6f93458bed9b472a3ead2c [file] [log] [blame]
Paul Mackerrasde56a942011-06-29 00:21:34 +00001/*
2 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
3 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
4 *
5 * Authors:
6 * Paul Mackerras <paulus@au1.ibm.com>
7 * Alexander Graf <agraf@suse.de>
8 * Kevin Wolf <mail@kevin-wolf.de>
9 *
10 * Description: KVM functions specific to running on Book 3S
11 * processors in hypervisor mode (specifically POWER7 and later).
12 *
13 * This file is derived from arch/powerpc/kvm/book3s.c,
14 * by Alexander Graf <agraf@suse.de>.
15 *
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License, version 2, as
18 * published by the Free Software Foundation.
19 */
20
21#include <linux/kvm_host.h>
22#include <linux/err.h>
23#include <linux/slab.h>
24#include <linux/preempt.h>
25#include <linux/sched.h>
26#include <linux/delay.h>
Paul Gortmaker66b15db2011-05-27 10:46:24 -040027#include <linux/export.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000028#include <linux/fs.h>
29#include <linux/anon_inodes.h>
30#include <linux/cpumask.h>
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +000031#include <linux/spinlock.h>
32#include <linux/page-flags.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000033
34#include <asm/reg.h>
35#include <asm/cputable.h>
36#include <asm/cacheflush.h>
37#include <asm/tlbflush.h>
38#include <asm/uaccess.h>
39#include <asm/io.h>
40#include <asm/kvm_ppc.h>
41#include <asm/kvm_book3s.h>
42#include <asm/mmu_context.h>
43#include <asm/lppaca.h>
44#include <asm/processor.h>
Paul Mackerras371fefd2011-06-29 00:23:08 +000045#include <asm/cputhreads.h>
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +000046#include <asm/page.h>
Michael Neulingde1d9242011-11-09 20:39:49 +000047#include <asm/hvcall.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000048#include <linux/gfp.h>
49#include <linux/sched.h>
50#include <linux/vmalloc.h>
51#include <linux/highmem.h>
52
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +000053#define LARGE_PAGE_ORDER 24 /* 16MB pages */
54
Paul Mackerrasde56a942011-06-29 00:21:34 +000055/* #define EXIT_DEBUG */
56/* #define EXIT_DEBUG_SIMPLE */
57/* #define EXIT_DEBUG_INT */
58
Paul Mackerras19ccb762011-07-23 17:42:46 +100059static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
60
Paul Mackerrasde56a942011-06-29 00:21:34 +000061void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
62{
63 local_paca->kvm_hstate.kvm_vcpu = vcpu;
Paul Mackerras371fefd2011-06-29 00:23:08 +000064 local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore;
Paul Mackerrasde56a942011-06-29 00:21:34 +000065}
66
67void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
68{
69}
70
Paul Mackerrasde56a942011-06-29 00:21:34 +000071void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
72{
73 vcpu->arch.shregs.msr = msr;
Paul Mackerras19ccb762011-07-23 17:42:46 +100074 kvmppc_end_cede(vcpu);
Paul Mackerrasde56a942011-06-29 00:21:34 +000075}
76
77void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
78{
79 vcpu->arch.pvr = pvr;
80}
81
82void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
83{
84 int r;
85
86 pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
87 pr_err("pc = %.16lx msr = %.16llx trap = %x\n",
88 vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap);
89 for (r = 0; r < 16; ++r)
90 pr_err("r%2d = %.16lx r%d = %.16lx\n",
91 r, kvmppc_get_gpr(vcpu, r),
92 r+16, kvmppc_get_gpr(vcpu, r+16));
93 pr_err("ctr = %.16lx lr = %.16lx\n",
94 vcpu->arch.ctr, vcpu->arch.lr);
95 pr_err("srr0 = %.16llx srr1 = %.16llx\n",
96 vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
97 pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
98 vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
99 pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
100 vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
101 pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n",
102 vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr);
103 pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
104 pr_err("fault dar = %.16lx dsisr = %.8x\n",
105 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
106 pr_err("SLB (%d entries):\n", vcpu->arch.slb_max);
107 for (r = 0; r < vcpu->arch.slb_max; ++r)
108 pr_err(" ESID = %.16llx VSID = %.16llx\n",
109 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
110 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +0000111 vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1,
Paul Mackerrasde56a942011-06-29 00:21:34 +0000112 vcpu->arch.last_inst);
113}
114
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000115struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
116{
117 int r;
118 struct kvm_vcpu *v, *ret = NULL;
119
120 mutex_lock(&kvm->lock);
121 kvm_for_each_vcpu(r, v, kvm) {
122 if (v->vcpu_id == id) {
123 ret = v;
124 break;
125 }
126 }
127 mutex_unlock(&kvm->lock);
128 return ret;
129}
130
131static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
132{
133 vpa->shared_proc = 1;
134 vpa->yield_count = 1;
135}
136
137static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
138 unsigned long flags,
139 unsigned long vcpuid, unsigned long vpa)
140{
141 struct kvm *kvm = vcpu->kvm;
Paul Mackerras93e60242011-12-12 12:28:55 +0000142 unsigned long len, nb;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000143 void *va;
144 struct kvm_vcpu *tvcpu;
Paul Mackerras93e60242011-12-12 12:28:55 +0000145 int err = H_PARAMETER;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000146
147 tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
148 if (!tvcpu)
149 return H_PARAMETER;
150
151 flags >>= 63 - 18;
152 flags &= 7;
153 if (flags == 0 || flags == 4)
154 return H_PARAMETER;
155 if (flags < 4) {
156 if (vpa & 0x7f)
157 return H_PARAMETER;
Paul Mackerras93e60242011-12-12 12:28:55 +0000158 if (flags >= 2 && !tvcpu->arch.vpa)
159 return H_RESOURCE;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000160 /* registering new area; convert logical addr to real */
Paul Mackerras93e60242011-12-12 12:28:55 +0000161 va = kvmppc_pin_guest_page(kvm, vpa, &nb);
162 if (va == NULL)
Paul Mackerrasb2b2f162011-12-12 12:28:21 +0000163 return H_PARAMETER;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000164 if (flags <= 1)
165 len = *(unsigned short *)(va + 4);
166 else
167 len = *(unsigned int *)(va + 4);
Paul Mackerras93e60242011-12-12 12:28:55 +0000168 if (len > nb)
169 goto out_unpin;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000170 switch (flags) {
171 case 1: /* register VPA */
172 if (len < 640)
Paul Mackerras93e60242011-12-12 12:28:55 +0000173 goto out_unpin;
174 if (tvcpu->arch.vpa)
175 kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa);
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000176 tvcpu->arch.vpa = va;
177 init_vpa(vcpu, va);
178 break;
179 case 2: /* register DTL */
180 if (len < 48)
Paul Mackerras93e60242011-12-12 12:28:55 +0000181 goto out_unpin;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000182 len -= len % 48;
Paul Mackerras93e60242011-12-12 12:28:55 +0000183 if (tvcpu->arch.dtl)
184 kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl);
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000185 tvcpu->arch.dtl = va;
186 tvcpu->arch.dtl_end = va + len;
187 break;
188 case 3: /* register SLB shadow buffer */
Paul Mackerras93e60242011-12-12 12:28:55 +0000189 if (len < 16)
190 goto out_unpin;
191 if (tvcpu->arch.slb_shadow)
192 kvmppc_unpin_guest_page(kvm, vcpu->arch.slb_shadow);
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000193 tvcpu->arch.slb_shadow = va;
194 break;
195 }
196 } else {
197 switch (flags) {
198 case 5: /* unregister VPA */
199 if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl)
200 return H_RESOURCE;
Paul Mackerras93e60242011-12-12 12:28:55 +0000201 if (!tvcpu->arch.vpa)
202 break;
203 kvmppc_unpin_guest_page(kvm, tvcpu->arch.vpa);
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000204 tvcpu->arch.vpa = NULL;
205 break;
206 case 6: /* unregister DTL */
Paul Mackerras93e60242011-12-12 12:28:55 +0000207 if (!tvcpu->arch.dtl)
208 break;
209 kvmppc_unpin_guest_page(kvm, tvcpu->arch.dtl);
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000210 tvcpu->arch.dtl = NULL;
211 break;
212 case 7: /* unregister SLB shadow buffer */
Paul Mackerras93e60242011-12-12 12:28:55 +0000213 if (!tvcpu->arch.slb_shadow)
214 break;
215 kvmppc_unpin_guest_page(kvm, tvcpu->arch.slb_shadow);
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000216 tvcpu->arch.slb_shadow = NULL;
217 break;
218 }
219 }
220 return H_SUCCESS;
Paul Mackerras93e60242011-12-12 12:28:55 +0000221
222 out_unpin:
223 kvmppc_unpin_guest_page(kvm, va);
224 return err;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000225}
226
227int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
228{
229 unsigned long req = kvmppc_get_gpr(vcpu, 3);
230 unsigned long target, ret = H_SUCCESS;
231 struct kvm_vcpu *tvcpu;
232
233 switch (req) {
234 case H_CEDE:
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000235 break;
236 case H_PROD:
237 target = kvmppc_get_gpr(vcpu, 4);
238 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
239 if (!tvcpu) {
240 ret = H_PARAMETER;
241 break;
242 }
243 tvcpu->arch.prodded = 1;
244 smp_mb();
245 if (vcpu->arch.ceded) {
246 if (waitqueue_active(&vcpu->wq)) {
247 wake_up_interruptible(&vcpu->wq);
248 vcpu->stat.halt_wakeup++;
249 }
250 }
251 break;
252 case H_CONFER:
253 break;
254 case H_REGISTER_VPA:
255 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
256 kvmppc_get_gpr(vcpu, 5),
257 kvmppc_get_gpr(vcpu, 6));
258 break;
259 default:
260 return RESUME_HOST;
261 }
262 kvmppc_set_gpr(vcpu, 3, ret);
263 vcpu->arch.hcall_needed = 0;
264 return RESUME_GUEST;
265}
266
Paul Mackerrasde56a942011-06-29 00:21:34 +0000267static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
268 struct task_struct *tsk)
269{
270 int r = RESUME_HOST;
271
272 vcpu->stat.sum_exits++;
273
274 run->exit_reason = KVM_EXIT_UNKNOWN;
275 run->ready_for_interrupt_injection = 1;
276 switch (vcpu->arch.trap) {
277 /* We're good on these - the host merely wanted to get our attention */
278 case BOOK3S_INTERRUPT_HV_DECREMENTER:
279 vcpu->stat.dec_exits++;
280 r = RESUME_GUEST;
281 break;
282 case BOOK3S_INTERRUPT_EXTERNAL:
283 vcpu->stat.ext_intr_exits++;
284 r = RESUME_GUEST;
285 break;
286 case BOOK3S_INTERRUPT_PERFMON:
287 r = RESUME_GUEST;
288 break;
289 case BOOK3S_INTERRUPT_PROGRAM:
290 {
291 ulong flags;
292 /*
293 * Normally program interrupts are delivered directly
294 * to the guest by the hardware, but we can get here
295 * as a result of a hypervisor emulation interrupt
296 * (e40) getting turned into a 700 by BML RTAS.
297 */
298 flags = vcpu->arch.shregs.msr & 0x1f0000ull;
299 kvmppc_core_queue_program(vcpu, flags);
300 r = RESUME_GUEST;
301 break;
302 }
303 case BOOK3S_INTERRUPT_SYSCALL:
304 {
305 /* hcall - punt to userspace */
306 int i;
307
308 if (vcpu->arch.shregs.msr & MSR_PR) {
309 /* sc 1 from userspace - reflect to guest syscall */
310 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_SYSCALL);
311 r = RESUME_GUEST;
312 break;
313 }
314 run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
315 for (i = 0; i < 9; ++i)
316 run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
317 run->exit_reason = KVM_EXIT_PAPR_HCALL;
318 vcpu->arch.hcall_needed = 1;
319 r = RESUME_HOST;
320 break;
321 }
322 /*
323 * We get these next two if the guest does a bad real-mode access,
324 * as we have enabled VRMA (virtualized real mode area) mode in the
325 * LPCR. We just generate an appropriate DSI/ISI to the guest.
326 */
327 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
328 vcpu->arch.shregs.dsisr = vcpu->arch.fault_dsisr;
329 vcpu->arch.shregs.dar = vcpu->arch.fault_dar;
330 kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE, 0);
331 r = RESUME_GUEST;
332 break;
333 case BOOK3S_INTERRUPT_H_INST_STORAGE:
334 kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE,
335 0x08000000);
336 r = RESUME_GUEST;
337 break;
338 /*
339 * This occurs if the guest executes an illegal instruction.
340 * We just generate a program interrupt to the guest, since
341 * we don't emulate any guest instructions at this stage.
342 */
343 case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
344 kvmppc_core_queue_program(vcpu, 0x80000);
345 r = RESUME_GUEST;
346 break;
347 default:
348 kvmppc_dump_regs(vcpu);
349 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
350 vcpu->arch.trap, kvmppc_get_pc(vcpu),
351 vcpu->arch.shregs.msr);
352 r = RESUME_HOST;
353 BUG();
354 break;
355 }
356
Paul Mackerrasde56a942011-06-29 00:21:34 +0000357 return r;
358}
359
360int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
361 struct kvm_sregs *sregs)
362{
363 int i;
364
365 sregs->pvr = vcpu->arch.pvr;
366
367 memset(sregs, 0, sizeof(struct kvm_sregs));
368 for (i = 0; i < vcpu->arch.slb_max; i++) {
369 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
370 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
371 }
372
373 return 0;
374}
375
376int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
377 struct kvm_sregs *sregs)
378{
379 int i, j;
380
381 kvmppc_set_pvr(vcpu, sregs->pvr);
382
383 j = 0;
384 for (i = 0; i < vcpu->arch.slb_nr; i++) {
385 if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) {
386 vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe;
387 vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv;
388 ++j;
389 }
390 }
391 vcpu->arch.slb_max = j;
392
393 return 0;
394}
395
396int kvmppc_core_check_processor_compat(void)
397{
Paul Mackerras9e368f22011-06-29 00:40:08 +0000398 if (cpu_has_feature(CPU_FTR_HVMODE))
Paul Mackerrasde56a942011-06-29 00:21:34 +0000399 return 0;
400 return -EIO;
401}
402
403struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
404{
405 struct kvm_vcpu *vcpu;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000406 int err = -EINVAL;
407 int core;
408 struct kvmppc_vcore *vcore;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000409
Paul Mackerras371fefd2011-06-29 00:23:08 +0000410 core = id / threads_per_core;
411 if (core >= KVM_MAX_VCORES)
412 goto out;
413
414 err = -ENOMEM;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000415 vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
416 if (!vcpu)
417 goto out;
418
419 err = kvm_vcpu_init(vcpu, kvm, id);
420 if (err)
421 goto free_vcpu;
422
423 vcpu->arch.shared = &vcpu->arch.shregs;
424 vcpu->arch.last_cpu = -1;
425 vcpu->arch.mmcr[0] = MMCR0_FC;
426 vcpu->arch.ctrl = CTRL_RUNLATCH;
427 /* default to host PVR, since we can't spoof it */
428 vcpu->arch.pvr = mfspr(SPRN_PVR);
429 kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
430
Paul Mackerrasde56a942011-06-29 00:21:34 +0000431 kvmppc_mmu_book3s_hv_init(vcpu);
432
Paul Mackerras371fefd2011-06-29 00:23:08 +0000433 /*
Paul Mackerras19ccb762011-07-23 17:42:46 +1000434 * We consider the vcpu stopped until we see the first run ioctl for it.
Paul Mackerras371fefd2011-06-29 00:23:08 +0000435 */
Paul Mackerras19ccb762011-07-23 17:42:46 +1000436 vcpu->arch.state = KVMPPC_VCPU_STOPPED;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000437
438 init_waitqueue_head(&vcpu->arch.cpu_run);
439
440 mutex_lock(&kvm->lock);
441 vcore = kvm->arch.vcores[core];
442 if (!vcore) {
443 vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
444 if (vcore) {
445 INIT_LIST_HEAD(&vcore->runnable_threads);
446 spin_lock_init(&vcore->lock);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000447 init_waitqueue_head(&vcore->wq);
Paul Mackerras371fefd2011-06-29 00:23:08 +0000448 }
449 kvm->arch.vcores[core] = vcore;
450 }
451 mutex_unlock(&kvm->lock);
452
453 if (!vcore)
454 goto free_vcpu;
455
456 spin_lock(&vcore->lock);
457 ++vcore->num_threads;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000458 spin_unlock(&vcore->lock);
459 vcpu->arch.vcore = vcore;
460
Alexander Grafaf8f38b2011-08-10 13:57:08 +0200461 vcpu->arch.cpu_type = KVM_CPU_3S_64;
462 kvmppc_sanity_check(vcpu);
463
Paul Mackerrasde56a942011-06-29 00:21:34 +0000464 return vcpu;
465
466free_vcpu:
467 kfree(vcpu);
468out:
469 return ERR_PTR(err);
470}
471
472void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
473{
Paul Mackerras93e60242011-12-12 12:28:55 +0000474 if (vcpu->arch.dtl)
475 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl);
476 if (vcpu->arch.slb_shadow)
477 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow);
478 if (vcpu->arch.vpa)
479 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000480 kvm_vcpu_uninit(vcpu);
481 kfree(vcpu);
482}
483
Paul Mackerras19ccb762011-07-23 17:42:46 +1000484static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
Paul Mackerrasde56a942011-06-29 00:21:34 +0000485{
Paul Mackerras19ccb762011-07-23 17:42:46 +1000486 unsigned long dec_nsec, now;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000487
Paul Mackerras19ccb762011-07-23 17:42:46 +1000488 now = get_tb();
489 if (now > vcpu->arch.dec_expires) {
490 /* decrementer has already gone negative */
491 kvmppc_core_queue_dec(vcpu);
Scott Wood7e28e60e2011-11-08 18:23:20 -0600492 kvmppc_core_prepare_to_enter(vcpu);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000493 return;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000494 }
Paul Mackerras19ccb762011-07-23 17:42:46 +1000495 dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
496 / tb_ticks_per_sec;
497 hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
498 HRTIMER_MODE_REL);
499 vcpu->arch.timer_running = 1;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000500}
501
Paul Mackerras19ccb762011-07-23 17:42:46 +1000502static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
Paul Mackerras371fefd2011-06-29 00:23:08 +0000503{
Paul Mackerras19ccb762011-07-23 17:42:46 +1000504 vcpu->arch.ceded = 0;
505 if (vcpu->arch.timer_running) {
506 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
507 vcpu->arch.timer_running = 0;
508 }
Paul Mackerras371fefd2011-06-29 00:23:08 +0000509}
510
511extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
512extern void xics_wake_cpu(int cpu);
513
514static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
515 struct kvm_vcpu *vcpu)
516{
517 struct kvm_vcpu *v;
518
519 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
520 return;
521 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
522 --vc->n_runnable;
Paul Mackerras19ccb762011-07-23 17:42:46 +1000523 ++vc->n_busy;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000524 /* decrement the physical thread id of each following vcpu */
525 v = vcpu;
526 list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
527 --v->arch.ptid;
528 list_del(&vcpu->arch.run_list);
529}
530
531static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
532{
533 int cpu;
534 struct paca_struct *tpaca;
535 struct kvmppc_vcore *vc = vcpu->arch.vcore;
536
Paul Mackerras19ccb762011-07-23 17:42:46 +1000537 if (vcpu->arch.timer_running) {
538 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
539 vcpu->arch.timer_running = 0;
540 }
Paul Mackerras371fefd2011-06-29 00:23:08 +0000541 cpu = vc->pcpu + vcpu->arch.ptid;
542 tpaca = &paca[cpu];
543 tpaca->kvm_hstate.kvm_vcpu = vcpu;
544 tpaca->kvm_hstate.kvm_vcore = vc;
Paul Mackerras19ccb762011-07-23 17:42:46 +1000545 tpaca->kvm_hstate.napping = 0;
546 vcpu->cpu = vc->pcpu;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000547 smp_wmb();
Michael Neuling251da032011-11-10 16:03:20 +0000548#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
Paul Mackerras371fefd2011-06-29 00:23:08 +0000549 if (vcpu->arch.ptid) {
550 tpaca->cpu_start = 0x80;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000551 wmb();
552 xics_wake_cpu(cpu);
553 ++vc->n_woken;
554 }
555#endif
556}
557
558static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
559{
560 int i;
561
562 HMT_low();
563 i = 0;
564 while (vc->nap_count < vc->n_woken) {
565 if (++i >= 1000000) {
566 pr_err("kvmppc_wait_for_nap timeout %d %d\n",
567 vc->nap_count, vc->n_woken);
568 break;
569 }
570 cpu_relax();
571 }
572 HMT_medium();
573}
574
575/*
576 * Check that we are on thread 0 and that any other threads in
577 * this core are off-line.
578 */
579static int on_primary_thread(void)
580{
581 int cpu = smp_processor_id();
582 int thr = cpu_thread_in_core(cpu);
583
584 if (thr)
585 return 0;
586 while (++thr < threads_per_core)
587 if (cpu_online(cpu + thr))
588 return 0;
589 return 1;
590}
591
592/*
593 * Run a set of guest threads on a physical core.
594 * Called with vc->lock held.
595 */
596static int kvmppc_run_core(struct kvmppc_vcore *vc)
597{
Paul Mackerras19ccb762011-07-23 17:42:46 +1000598 struct kvm_vcpu *vcpu, *vcpu0, *vnext;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000599 long ret;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000600 u64 now;
Paul Mackerras19ccb762011-07-23 17:42:46 +1000601 int ptid;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000602
Paul Mackerras371fefd2011-06-29 00:23:08 +0000603 /* don't start if any threads have a signal pending */
604 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
605 if (signal_pending(vcpu->arch.run_task))
606 return 0;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000607
608 /*
609 * Make sure we are running on thread 0, and that
610 * secondary threads are offline.
611 * XXX we should also block attempts to bring any
612 * secondary threads online.
613 */
Paul Mackerras371fefd2011-06-29 00:23:08 +0000614 if (threads_per_core > 1 && !on_primary_thread()) {
615 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
616 vcpu->arch.ret = -EBUSY;
617 goto out;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000618 }
619
Paul Mackerras19ccb762011-07-23 17:42:46 +1000620 /*
621 * Assign physical thread IDs, first to non-ceded vcpus
622 * and then to ceded ones.
623 */
624 ptid = 0;
625 vcpu0 = NULL;
626 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
627 if (!vcpu->arch.ceded) {
628 if (!ptid)
629 vcpu0 = vcpu;
630 vcpu->arch.ptid = ptid++;
631 }
632 }
633 if (!vcpu0)
634 return 0; /* nothing to run */
635 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
636 if (vcpu->arch.ceded)
637 vcpu->arch.ptid = ptid++;
638
Paul Mackerras371fefd2011-06-29 00:23:08 +0000639 vc->n_woken = 0;
640 vc->nap_count = 0;
641 vc->entry_exit_count = 0;
Paul Mackerras19ccb762011-07-23 17:42:46 +1000642 vc->vcore_state = VCORE_RUNNING;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000643 vc->in_guest = 0;
644 vc->pcpu = smp_processor_id();
Paul Mackerras19ccb762011-07-23 17:42:46 +1000645 vc->napping_threads = 0;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000646 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
647 kvmppc_start_thread(vcpu);
Paul Mackerras371fefd2011-06-29 00:23:08 +0000648
649 preempt_disable();
Paul Mackerras19ccb762011-07-23 17:42:46 +1000650 spin_unlock(&vc->lock);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000651
Paul Mackerras19ccb762011-07-23 17:42:46 +1000652 kvm_guest_enter();
653 __kvmppc_vcore_entry(NULL, vcpu0);
654
Paul Mackerras371fefd2011-06-29 00:23:08 +0000655 spin_lock(&vc->lock);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000656 /* disable sending of IPIs on virtual external irqs */
657 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
658 vcpu->cpu = -1;
659 /* wait for secondary threads to finish writing their state to memory */
Paul Mackerras371fefd2011-06-29 00:23:08 +0000660 if (vc->nap_count < vc->n_woken)
661 kvmppc_wait_for_nap(vc);
662 /* prevent other vcpu threads from doing kvmppc_start_thread() now */
Paul Mackerras19ccb762011-07-23 17:42:46 +1000663 vc->vcore_state = VCORE_EXITING;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000664 spin_unlock(&vc->lock);
665
666 /* make sure updates to secondary vcpu structs are visible now */
667 smp_mb();
Paul Mackerrasde56a942011-06-29 00:21:34 +0000668 kvm_guest_exit();
669
670 preempt_enable();
671 kvm_resched(vcpu);
672
673 now = get_tb();
Paul Mackerras371fefd2011-06-29 00:23:08 +0000674 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
675 /* cancel pending dec exception if dec is positive */
676 if (now < vcpu->arch.dec_expires &&
677 kvmppc_core_pending_dec(vcpu))
678 kvmppc_core_dequeue_dec(vcpu);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000679
680 ret = RESUME_GUEST;
681 if (vcpu->arch.trap)
682 ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
683 vcpu->arch.run_task);
684
Paul Mackerras371fefd2011-06-29 00:23:08 +0000685 vcpu->arch.ret = ret;
686 vcpu->arch.trap = 0;
Paul Mackerras19ccb762011-07-23 17:42:46 +1000687
688 if (vcpu->arch.ceded) {
689 if (ret != RESUME_GUEST)
690 kvmppc_end_cede(vcpu);
691 else
692 kvmppc_set_timer(vcpu);
693 }
Paul Mackerras371fefd2011-06-29 00:23:08 +0000694 }
Paul Mackerrasde56a942011-06-29 00:21:34 +0000695
Paul Mackerras371fefd2011-06-29 00:23:08 +0000696 spin_lock(&vc->lock);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000697 out:
Paul Mackerras19ccb762011-07-23 17:42:46 +1000698 vc->vcore_state = VCORE_INACTIVE;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000699 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
700 arch.run_list) {
701 if (vcpu->arch.ret != RESUME_GUEST) {
702 kvmppc_remove_runnable(vc, vcpu);
703 wake_up(&vcpu->arch.cpu_run);
704 }
705 }
706
707 return 1;
708}
709
Paul Mackerras19ccb762011-07-23 17:42:46 +1000710/*
711 * Wait for some other vcpu thread to execute us, and
712 * wake us up when we need to handle something in the host.
713 */
714static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
Paul Mackerras371fefd2011-06-29 00:23:08 +0000715{
Paul Mackerras371fefd2011-06-29 00:23:08 +0000716 DEFINE_WAIT(wait);
717
Paul Mackerras19ccb762011-07-23 17:42:46 +1000718 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
719 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
720 schedule();
721 finish_wait(&vcpu->arch.cpu_run, &wait);
722}
Paul Mackerras371fefd2011-06-29 00:23:08 +0000723
Paul Mackerras19ccb762011-07-23 17:42:46 +1000724/*
725 * All the vcpus in this vcore are idle, so wait for a decrementer
726 * or external interrupt to one of the vcpus. vc->lock is held.
727 */
728static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
729{
730 DEFINE_WAIT(wait);
731 struct kvm_vcpu *v;
732 int all_idle = 1;
733
734 prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
735 vc->vcore_state = VCORE_SLEEPING;
736 spin_unlock(&vc->lock);
737 list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
738 if (!v->arch.ceded || v->arch.pending_exceptions) {
739 all_idle = 0;
740 break;
741 }
742 }
743 if (all_idle)
744 schedule();
745 finish_wait(&vc->wq, &wait);
746 spin_lock(&vc->lock);
747 vc->vcore_state = VCORE_INACTIVE;
748}
749
750static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
751{
752 int n_ceded;
753 int prev_state;
754 struct kvmppc_vcore *vc;
755 struct kvm_vcpu *v, *vn;
Paul Mackerras9e368f22011-06-29 00:40:08 +0000756
Paul Mackerras371fefd2011-06-29 00:23:08 +0000757 kvm_run->exit_reason = 0;
758 vcpu->arch.ret = RESUME_GUEST;
759 vcpu->arch.trap = 0;
760
Paul Mackerras371fefd2011-06-29 00:23:08 +0000761 /*
762 * Synchronize with other threads in this virtual core
763 */
764 vc = vcpu->arch.vcore;
765 spin_lock(&vc->lock);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000766 vcpu->arch.ceded = 0;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000767 vcpu->arch.run_task = current;
768 vcpu->arch.kvm_run = kvm_run;
Paul Mackerras19ccb762011-07-23 17:42:46 +1000769 prev_state = vcpu->arch.state;
770 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000771 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
772 ++vc->n_runnable;
773
Paul Mackerras19ccb762011-07-23 17:42:46 +1000774 /*
775 * This happens the first time this is called for a vcpu.
776 * If the vcore is already running, we may be able to start
777 * this thread straight away and have it join in.
778 */
779 if (prev_state == KVMPPC_VCPU_STOPPED) {
780 if (vc->vcore_state == VCORE_RUNNING &&
781 VCORE_EXIT_COUNT(vc) == 0) {
782 vcpu->arch.ptid = vc->n_runnable - 1;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000783 kvmppc_start_thread(vcpu);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000784 }
Paul Mackerras371fefd2011-06-29 00:23:08 +0000785
Paul Mackerras19ccb762011-07-23 17:42:46 +1000786 } else if (prev_state == KVMPPC_VCPU_BUSY_IN_HOST)
787 --vc->n_busy;
788
789 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
790 !signal_pending(current)) {
791 if (vc->n_busy || vc->vcore_state != VCORE_INACTIVE) {
792 spin_unlock(&vc->lock);
793 kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
794 spin_lock(&vc->lock);
795 continue;
796 }
797 n_ceded = 0;
798 list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
799 n_ceded += v->arch.ceded;
800 if (n_ceded == vc->n_runnable)
801 kvmppc_vcore_blocked(vc);
802 else
803 kvmppc_run_core(vc);
804
805 list_for_each_entry_safe(v, vn, &vc->runnable_threads,
806 arch.run_list) {
Scott Wood7e28e60e2011-11-08 18:23:20 -0600807 kvmppc_core_prepare_to_enter(v);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000808 if (signal_pending(v->arch.run_task)) {
809 kvmppc_remove_runnable(vc, v);
810 v->stat.signal_exits++;
811 v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
812 v->arch.ret = -EINTR;
813 wake_up(&v->arch.cpu_run);
814 }
815 }
Paul Mackerras371fefd2011-06-29 00:23:08 +0000816 }
817
Paul Mackerras19ccb762011-07-23 17:42:46 +1000818 if (signal_pending(current)) {
819 if (vc->vcore_state == VCORE_RUNNING ||
820 vc->vcore_state == VCORE_EXITING) {
821 spin_unlock(&vc->lock);
822 kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
823 spin_lock(&vc->lock);
824 }
825 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
826 kvmppc_remove_runnable(vc, vcpu);
827 vcpu->stat.signal_exits++;
828 kvm_run->exit_reason = KVM_EXIT_INTR;
829 vcpu->arch.ret = -EINTR;
830 }
831 }
Paul Mackerras371fefd2011-06-29 00:23:08 +0000832
Paul Mackerras19ccb762011-07-23 17:42:46 +1000833 spin_unlock(&vc->lock);
Paul Mackerras371fefd2011-06-29 00:23:08 +0000834 return vcpu->arch.ret;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000835}
836
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000837int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
838{
839 int r;
840
Alexander Grafaf8f38b2011-08-10 13:57:08 +0200841 if (!vcpu->arch.sane) {
842 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
843 return -EINVAL;
844 }
845
Scott Wood25051b52011-11-08 18:23:23 -0600846 kvmppc_core_prepare_to_enter(vcpu);
847
Paul Mackerras19ccb762011-07-23 17:42:46 +1000848 /* No need to go into the guest when all we'll do is come back out */
849 if (signal_pending(current)) {
850 run->exit_reason = KVM_EXIT_INTR;
851 return -EINTR;
852 }
853
854 /* On PPC970, check that we have an RMA region */
855 if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201))
856 return -EPERM;
857
858 flush_fp_to_thread(current);
859 flush_altivec_to_thread(current);
860 flush_vsx_to_thread(current);
861 vcpu->arch.wqp = &vcpu->arch.vcore->wq;
862
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000863 do {
864 r = kvmppc_run_vcpu(run, vcpu);
865
866 if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
867 !(vcpu->arch.shregs.msr & MSR_PR)) {
868 r = kvmppc_pseries_do_hcall(vcpu);
Scott Wood7e28e60e2011-11-08 18:23:20 -0600869 kvmppc_core_prepare_to_enter(vcpu);
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000870 }
871 } while (r == RESUME_GUEST);
872 return r;
873}
874
David Gibson54738c02011-06-29 00:22:41 +0000875static long kvmppc_stt_npages(unsigned long window_size)
876{
877 return ALIGN((window_size >> SPAPR_TCE_SHIFT)
878 * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
879}
880
881static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
882{
883 struct kvm *kvm = stt->kvm;
884 int i;
885
886 mutex_lock(&kvm->lock);
887 list_del(&stt->list);
888 for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
889 __free_page(stt->pages[i]);
890 kfree(stt);
891 mutex_unlock(&kvm->lock);
892
893 kvm_put_kvm(kvm);
894}
895
896static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
897{
898 struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
899 struct page *page;
900
901 if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
902 return VM_FAULT_SIGBUS;
903
904 page = stt->pages[vmf->pgoff];
905 get_page(page);
906 vmf->page = page;
907 return 0;
908}
909
910static const struct vm_operations_struct kvm_spapr_tce_vm_ops = {
911 .fault = kvm_spapr_tce_fault,
912};
913
914static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
915{
916 vma->vm_ops = &kvm_spapr_tce_vm_ops;
917 return 0;
918}
919
920static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
921{
922 struct kvmppc_spapr_tce_table *stt = filp->private_data;
923
924 release_spapr_tce_table(stt);
925 return 0;
926}
927
928static struct file_operations kvm_spapr_tce_fops = {
929 .mmap = kvm_spapr_tce_mmap,
930 .release = kvm_spapr_tce_release,
931};
932
933long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
934 struct kvm_create_spapr_tce *args)
935{
936 struct kvmppc_spapr_tce_table *stt = NULL;
937 long npages;
938 int ret = -ENOMEM;
939 int i;
940
941 /* Check this LIOBN hasn't been previously allocated */
942 list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
943 if (stt->liobn == args->liobn)
944 return -EBUSY;
945 }
946
947 npages = kvmppc_stt_npages(args->window_size);
948
949 stt = kzalloc(sizeof(*stt) + npages* sizeof(struct page *),
950 GFP_KERNEL);
951 if (!stt)
952 goto fail;
953
954 stt->liobn = args->liobn;
955 stt->window_size = args->window_size;
956 stt->kvm = kvm;
957
958 for (i = 0; i < npages; i++) {
959 stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
960 if (!stt->pages[i])
961 goto fail;
962 }
963
964 kvm_get_kvm(kvm);
965
966 mutex_lock(&kvm->lock);
967 list_add(&stt->list, &kvm->arch.spapr_tce_tables);
968
969 mutex_unlock(&kvm->lock);
970
971 return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
972 stt, O_RDWR);
973
974fail:
975 if (stt) {
976 for (i = 0; i < npages; i++)
977 if (stt->pages[i])
978 __free_page(stt->pages[i]);
979
980 kfree(stt);
981 }
982 return ret;
983}
984
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +0000985/* Work out RMLS (real mode limit selector) field value for a given RMA size.
Paul Mackerras9e368f22011-06-29 00:40:08 +0000986 Assumes POWER7 or PPC970. */
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +0000987static inline int lpcr_rmls(unsigned long rma_size)
988{
989 switch (rma_size) {
990 case 32ul << 20: /* 32 MB */
Paul Mackerras9e368f22011-06-29 00:40:08 +0000991 if (cpu_has_feature(CPU_FTR_ARCH_206))
992 return 8; /* only supported on POWER7 */
993 return -1;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +0000994 case 64ul << 20: /* 64 MB */
995 return 3;
996 case 128ul << 20: /* 128 MB */
997 return 7;
998 case 256ul << 20: /* 256 MB */
999 return 4;
1000 case 1ul << 30: /* 1 GB */
1001 return 2;
1002 case 16ul << 30: /* 16 GB */
1003 return 1;
1004 case 256ul << 30: /* 256 GB */
1005 return 0;
1006 default:
1007 return -1;
1008 }
1009}
1010
1011static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1012{
1013 struct kvmppc_rma_info *ri = vma->vm_file->private_data;
1014 struct page *page;
1015
1016 if (vmf->pgoff >= ri->npages)
1017 return VM_FAULT_SIGBUS;
1018
1019 page = pfn_to_page(ri->base_pfn + vmf->pgoff);
1020 get_page(page);
1021 vmf->page = page;
1022 return 0;
1023}
1024
1025static const struct vm_operations_struct kvm_rma_vm_ops = {
1026 .fault = kvm_rma_fault,
1027};
1028
1029static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
1030{
1031 vma->vm_flags |= VM_RESERVED;
1032 vma->vm_ops = &kvm_rma_vm_ops;
1033 return 0;
1034}
1035
1036static int kvm_rma_release(struct inode *inode, struct file *filp)
1037{
1038 struct kvmppc_rma_info *ri = filp->private_data;
1039
1040 kvm_release_rma(ri);
1041 return 0;
1042}
1043
1044static struct file_operations kvm_rma_fops = {
1045 .mmap = kvm_rma_mmap,
1046 .release = kvm_rma_release,
1047};
1048
1049long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
1050{
1051 struct kvmppc_rma_info *ri;
1052 long fd;
1053
1054 ri = kvm_alloc_rma();
1055 if (!ri)
1056 return -ENOMEM;
1057
1058 fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR);
1059 if (fd < 0)
1060 kvm_release_rma(ri);
1061
1062 ret->rma_size = ri->npages << PAGE_SHIFT;
1063 return fd;
1064}
1065
1066static struct page *hva_to_page(unsigned long addr)
1067{
1068 struct page *page[1];
1069 int npages;
1070
1071 might_sleep();
1072
1073 npages = get_user_pages_fast(addr, 1, 1, page);
1074
1075 if (unlikely(npages != 1))
1076 return 0;
1077
1078 return page[0];
1079}
1080
Paul Mackerrasde56a942011-06-29 00:21:34 +00001081int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1082 struct kvm_userspace_memory_region *mem)
1083{
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001084 unsigned long psize, porder;
Paul Mackerrasb2b2f162011-12-12 12:28:21 +00001085 unsigned long i, npages;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001086 unsigned long hva;
1087 struct kvmppc_rma_info *ri = NULL;
1088 struct page *page;
Paul Mackerrasb2b2f162011-12-12 12:28:21 +00001089 unsigned long *phys;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001090
1091 /* For now, only allow 16MB pages */
1092 porder = LARGE_PAGE_ORDER;
1093 psize = 1ul << porder;
1094 if ((mem->memory_size & (psize - 1)) ||
1095 (mem->guest_phys_addr & (psize - 1))) {
1096 pr_err("bad memory_size=%llx @ %llx\n",
1097 mem->memory_size, mem->guest_phys_addr);
1098 return -EINVAL;
1099 }
1100
Paul Mackerrasb2b2f162011-12-12 12:28:21 +00001101 /* Allocate a slot_phys array */
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001102 npages = mem->memory_size >> porder;
Paul Mackerrasb2b2f162011-12-12 12:28:21 +00001103 phys = kvm->arch.slot_phys[mem->slot];
1104 if (!phys) {
1105 phys = vzalloc(npages * sizeof(unsigned long));
1106 if (!phys)
1107 return -ENOMEM;
1108 kvm->arch.slot_phys[mem->slot] = phys;
1109 kvm->arch.slot_npages[mem->slot] = npages;
1110 }
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001111
1112 /* Do we already have an RMA registered? */
1113 if (mem->guest_phys_addr == 0 && kvm->arch.rma)
1114 return -EINVAL;
1115
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001116 /* Is this one of our preallocated RMAs? */
1117 if (mem->guest_phys_addr == 0) {
1118 struct vm_area_struct *vma;
1119
1120 down_read(&current->mm->mmap_sem);
1121 vma = find_vma(current->mm, mem->userspace_addr);
1122 if (vma && vma->vm_file &&
1123 vma->vm_file->f_op == &kvm_rma_fops &&
1124 mem->userspace_addr == vma->vm_start)
1125 ri = vma->vm_file->private_data;
1126 up_read(&current->mm->mmap_sem);
Paul Mackerras9e368f22011-06-29 00:40:08 +00001127 if (!ri && cpu_has_feature(CPU_FTR_ARCH_201)) {
1128 pr_err("CPU requires an RMO\n");
1129 return -EINVAL;
1130 }
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001131 }
1132
1133 if (ri) {
1134 unsigned long rma_size;
1135 unsigned long lpcr;
1136 long rmls;
1137
1138 rma_size = ri->npages << PAGE_SHIFT;
1139 if (rma_size > mem->memory_size)
1140 rma_size = mem->memory_size;
1141 rmls = lpcr_rmls(rma_size);
1142 if (rmls < 0) {
1143 pr_err("Can't use RMA of 0x%lx bytes\n", rma_size);
1144 return -EINVAL;
1145 }
1146 atomic_inc(&ri->use_count);
1147 kvm->arch.rma = ri;
Paul Mackerras9e368f22011-06-29 00:40:08 +00001148
1149 /* Update LPCR and RMOR */
1150 lpcr = kvm->arch.lpcr;
1151 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
1152 /* PPC970; insert RMLS value (split field) in HID4 */
1153 lpcr &= ~((1ul << HID4_RMLS0_SH) |
1154 (3ul << HID4_RMLS2_SH));
1155 lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) |
1156 ((rmls & 3) << HID4_RMLS2_SH);
1157 /* RMOR is also in HID4 */
1158 lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
1159 << HID4_RMOR_SH;
1160 } else {
1161 /* POWER7 */
1162 lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
1163 lpcr |= rmls << LPCR_RMLS_SH;
1164 kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
1165 }
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001166 kvm->arch.lpcr = lpcr;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001167 pr_info("Using RMO at %lx size %lx (LPCR = %lx)\n",
1168 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
1169 }
1170
Paul Mackerrasb2b2f162011-12-12 12:28:21 +00001171 for (i = 0; i < npages; ++i) {
1172 if (ri && i < ri->npages) {
1173 phys[i] = (ri->base_pfn << PAGE_SHIFT) + (i << porder);
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001174 continue;
1175 }
1176 hva = mem->userspace_addr + (i << porder);
1177 page = hva_to_page(hva);
1178 if (!page) {
1179 pr_err("oops, no pfn for hva %lx\n", hva);
1180 goto err;
1181 }
1182 /* Check it's a 16MB page */
1183 if (!PageHead(page) ||
1184 compound_order(page) != (LARGE_PAGE_ORDER - PAGE_SHIFT)) {
1185 pr_err("page at %lx isn't 16MB (o=%d)\n",
1186 hva, compound_order(page));
1187 goto err;
1188 }
Paul Mackerrasb2b2f162011-12-12 12:28:21 +00001189 phys[i] = (page_to_pfn(page) << PAGE_SHIFT) | KVMPPC_GOT_PAGE;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001190 }
1191
Paul Mackerrasde56a942011-06-29 00:21:34 +00001192 return 0;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001193
1194 err:
1195 return -EINVAL;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001196}
1197
Paul Mackerrasb2b2f162011-12-12 12:28:21 +00001198static void unpin_slot(struct kvm *kvm, int slot_id)
1199{
1200 unsigned long *physp;
1201 unsigned long j, npages, pfn;
1202 struct page *page;
1203
1204 physp = kvm->arch.slot_phys[slot_id];
1205 npages = kvm->arch.slot_npages[slot_id];
1206 if (physp) {
1207 for (j = 0; j < npages; j++) {
1208 if (!(physp[j] & KVMPPC_GOT_PAGE))
1209 continue;
1210 pfn = physp[j] >> PAGE_SHIFT;
1211 page = pfn_to_page(pfn);
1212 SetPageDirty(page);
1213 put_page(page);
1214 }
1215 vfree(physp);
1216 kvm->arch.slot_phys[slot_id] = NULL;
1217 }
1218}
1219
Paul Mackerrasde56a942011-06-29 00:21:34 +00001220void kvmppc_core_commit_memory_region(struct kvm *kvm,
1221 struct kvm_userspace_memory_region *mem)
1222{
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001223 if (mem->guest_phys_addr == 0 && mem->memory_size != 0 &&
1224 !kvm->arch.rma)
Paul Mackerrasde56a942011-06-29 00:21:34 +00001225 kvmppc_map_vrma(kvm, mem);
1226}
1227
1228int kvmppc_core_init_vm(struct kvm *kvm)
1229{
1230 long r;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001231 unsigned long lpcr;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001232
1233 /* Allocate hashed page table */
1234 r = kvmppc_alloc_hpt(kvm);
David Gibson54738c02011-06-29 00:22:41 +00001235 if (r)
1236 return r;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001237
David Gibson54738c02011-06-29 00:22:41 +00001238 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001239
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001240 kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER;
1241 kvm->arch.ram_porder = LARGE_PAGE_ORDER;
1242 kvm->arch.rma = NULL;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001243
Paul Mackerras9e368f22011-06-29 00:40:08 +00001244 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001245
Paul Mackerras9e368f22011-06-29 00:40:08 +00001246 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
1247 /* PPC970; HID4 is effectively the LPCR */
1248 unsigned long lpid = kvm->arch.lpid;
1249 kvm->arch.host_lpid = 0;
1250 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
1251 lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
1252 lpcr |= ((lpid >> 4) << HID4_LPID1_SH) |
1253 ((lpid & 0xf) << HID4_LPID5_SH);
1254 } else {
1255 /* POWER7; init LPCR for virtual RMA mode */
1256 kvm->arch.host_lpid = mfspr(SPRN_LPID);
1257 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
1258 lpcr &= LPCR_PECE | LPCR_LPES;
1259 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
1260 LPCR_VPM0 | LPCR_VRMA_L;
1261 }
1262 kvm->arch.lpcr = lpcr;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001263
David Gibson54738c02011-06-29 00:22:41 +00001264 return 0;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001265}
1266
1267void kvmppc_core_destroy_vm(struct kvm *kvm)
1268{
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001269 unsigned long i;
1270
Paul Mackerrasb2b2f162011-12-12 12:28:21 +00001271 for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
1272 unpin_slot(kvm, i);
1273
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001274 if (kvm->arch.rma) {
1275 kvm_release_rma(kvm->arch.rma);
1276 kvm->arch.rma = NULL;
1277 }
1278
Paul Mackerrasde56a942011-06-29 00:21:34 +00001279 kvmppc_free_hpt(kvm);
David Gibson54738c02011-06-29 00:22:41 +00001280 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
Paul Mackerrasde56a942011-06-29 00:21:34 +00001281}
1282
1283/* These are stubs for now */
1284void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
1285{
1286}
1287
1288/* We don't need to emulate any privileged instructions or dcbz */
1289int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
1290 unsigned int inst, int *advance)
1291{
1292 return EMULATE_FAIL;
1293}
1294
1295int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
1296{
1297 return EMULATE_FAIL;
1298}
1299
1300int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
1301{
1302 return EMULATE_FAIL;
1303}
1304
1305static int kvmppc_book3s_hv_init(void)
1306{
1307 int r;
1308
1309 r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1310
1311 if (r)
1312 return r;
1313
1314 r = kvmppc_mmu_hv_init();
1315
1316 return r;
1317}
1318
1319static void kvmppc_book3s_hv_exit(void)
1320{
1321 kvm_exit();
1322}
1323
1324module_init(kvmppc_book3s_hv_init);
1325module_exit(kvmppc_book3s_hv_exit);