blob: 38c7f1bc3495c7b7d19f92e8d5b04434ea07131b [file] [log] [blame]
Paul Mackerrasde56a942011-06-29 00:21:34 +00001/*
2 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
3 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
4 *
5 * Authors:
6 * Paul Mackerras <paulus@au1.ibm.com>
7 * Alexander Graf <agraf@suse.de>
8 * Kevin Wolf <mail@kevin-wolf.de>
9 *
10 * Description: KVM functions specific to running on Book 3S
11 * processors in hypervisor mode (specifically POWER7 and later).
12 *
13 * This file is derived from arch/powerpc/kvm/book3s.c,
14 * by Alexander Graf <agraf@suse.de>.
15 *
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License, version 2, as
18 * published by the Free Software Foundation.
19 */
20
21#include <linux/kvm_host.h>
22#include <linux/err.h>
23#include <linux/slab.h>
24#include <linux/preempt.h>
25#include <linux/sched.h>
26#include <linux/delay.h>
Paul Gortmaker66b15db2011-05-27 10:46:24 -040027#include <linux/export.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000028#include <linux/fs.h>
29#include <linux/anon_inodes.h>
30#include <linux/cpumask.h>
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +000031#include <linux/spinlock.h>
32#include <linux/page-flags.h>
Paul Mackerras2c9097e2012-09-11 13:27:01 +000033#include <linux/srcu.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000034
35#include <asm/reg.h>
36#include <asm/cputable.h>
37#include <asm/cacheflush.h>
38#include <asm/tlbflush.h>
39#include <asm/uaccess.h>
40#include <asm/io.h>
41#include <asm/kvm_ppc.h>
42#include <asm/kvm_book3s.h>
43#include <asm/mmu_context.h>
44#include <asm/lppaca.h>
45#include <asm/processor.h>
Paul Mackerras371fefd2011-06-29 00:23:08 +000046#include <asm/cputhreads.h>
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +000047#include <asm/page.h>
Michael Neulingde1d9242011-11-09 20:39:49 +000048#include <asm/hvcall.h>
David Howellsae3a1972012-03-28 18:30:02 +010049#include <asm/switch_to.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000050#include <linux/gfp.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000051#include <linux/vmalloc.h>
52#include <linux/highmem.h>
Paul Mackerrasc77162d2011-12-12 12:31:00 +000053#include <linux/hugetlb.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000054
55/* #define EXIT_DEBUG */
56/* #define EXIT_DEBUG_SIMPLE */
57/* #define EXIT_DEBUG_INT */
58
Paul Mackerras19ccb762011-07-23 17:42:46 +100059static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
Paul Mackerras32fad282012-05-04 02:32:53 +000060static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
Paul Mackerras19ccb762011-07-23 17:42:46 +100061
Paul Mackerrasde56a942011-06-29 00:21:34 +000062void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
63{
Paul Mackerras0456ec42012-02-03 00:56:21 +000064 struct kvmppc_vcore *vc = vcpu->arch.vcore;
65
Paul Mackerrasde56a942011-06-29 00:21:34 +000066 local_paca->kvm_hstate.kvm_vcpu = vcpu;
Paul Mackerras0456ec42012-02-03 00:56:21 +000067 local_paca->kvm_hstate.kvm_vcore = vc;
68 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
69 vc->stolen_tb += mftb() - vc->preempt_tb;
Paul Mackerrasde56a942011-06-29 00:21:34 +000070}
71
72void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
73{
Paul Mackerras0456ec42012-02-03 00:56:21 +000074 struct kvmppc_vcore *vc = vcpu->arch.vcore;
75
76 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
77 vc->preempt_tb = mftb();
Paul Mackerrasde56a942011-06-29 00:21:34 +000078}
79
Paul Mackerrasde56a942011-06-29 00:21:34 +000080void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
81{
82 vcpu->arch.shregs.msr = msr;
Paul Mackerras19ccb762011-07-23 17:42:46 +100083 kvmppc_end_cede(vcpu);
Paul Mackerrasde56a942011-06-29 00:21:34 +000084}
85
86void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
87{
88 vcpu->arch.pvr = pvr;
89}
90
91void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
92{
93 int r;
94
95 pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
96 pr_err("pc = %.16lx msr = %.16llx trap = %x\n",
97 vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap);
98 for (r = 0; r < 16; ++r)
99 pr_err("r%2d = %.16lx r%d = %.16lx\n",
100 r, kvmppc_get_gpr(vcpu, r),
101 r+16, kvmppc_get_gpr(vcpu, r+16));
102 pr_err("ctr = %.16lx lr = %.16lx\n",
103 vcpu->arch.ctr, vcpu->arch.lr);
104 pr_err("srr0 = %.16llx srr1 = %.16llx\n",
105 vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
106 pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
107 vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
108 pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
109 vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
110 pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n",
111 vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr);
112 pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
113 pr_err("fault dar = %.16lx dsisr = %.8x\n",
114 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
115 pr_err("SLB (%d entries):\n", vcpu->arch.slb_max);
116 for (r = 0; r < vcpu->arch.slb_max; ++r)
117 pr_err(" ESID = %.16llx VSID = %.16llx\n",
118 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
119 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +0000120 vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1,
Paul Mackerrasde56a942011-06-29 00:21:34 +0000121 vcpu->arch.last_inst);
122}
123
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000124struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
125{
126 int r;
127 struct kvm_vcpu *v, *ret = NULL;
128
129 mutex_lock(&kvm->lock);
130 kvm_for_each_vcpu(r, v, kvm) {
131 if (v->vcpu_id == id) {
132 ret = v;
133 break;
134 }
135 }
136 mutex_unlock(&kvm->lock);
137 return ret;
138}
139
140static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
141{
142 vpa->shared_proc = 1;
143 vpa->yield_count = 1;
144}
145
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000146/* Length for a per-processor buffer is passed in at offset 4 in the buffer */
147struct reg_vpa {
148 u32 dummy;
149 union {
150 u16 hword;
151 u32 word;
152 } length;
153};
154
155static int vpa_is_registered(struct kvmppc_vpa *vpap)
156{
157 if (vpap->update_pending)
158 return vpap->next_gpa != 0;
159 return vpap->pinned_addr != NULL;
160}
161
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000162static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
163 unsigned long flags,
164 unsigned long vcpuid, unsigned long vpa)
165{
166 struct kvm *kvm = vcpu->kvm;
Paul Mackerras93e60242011-12-12 12:28:55 +0000167 unsigned long len, nb;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000168 void *va;
169 struct kvm_vcpu *tvcpu;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000170 int err;
171 int subfunc;
172 struct kvmppc_vpa *vpap;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000173
174 tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
175 if (!tvcpu)
176 return H_PARAMETER;
177
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000178 subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK;
179 if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL ||
180 subfunc == H_VPA_REG_SLB) {
181 /* Registering new area - address must be cache-line aligned */
182 if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa)
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000183 return H_PARAMETER;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000184
185 /* convert logical addr to kernel addr and read length */
Paul Mackerras93e60242011-12-12 12:28:55 +0000186 va = kvmppc_pin_guest_page(kvm, vpa, &nb);
187 if (va == NULL)
Paul Mackerrasb2b2f162011-12-12 12:28:21 +0000188 return H_PARAMETER;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000189 if (subfunc == H_VPA_REG_VPA)
190 len = ((struct reg_vpa *)va)->length.hword;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000191 else
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000192 len = ((struct reg_vpa *)va)->length.word;
193 kvmppc_unpin_guest_page(kvm, va);
194
195 /* Check length */
196 if (len > nb || len < sizeof(struct reg_vpa))
197 return H_PARAMETER;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000198 } else {
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000199 vpa = 0;
200 len = 0;
201 }
202
203 err = H_PARAMETER;
204 vpap = NULL;
205 spin_lock(&tvcpu->arch.vpa_update_lock);
206
207 switch (subfunc) {
208 case H_VPA_REG_VPA: /* register VPA */
209 if (len < sizeof(struct lppaca))
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000210 break;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000211 vpap = &tvcpu->arch.vpa;
212 err = 0;
213 break;
214
215 case H_VPA_REG_DTL: /* register DTL */
216 if (len < sizeof(struct dtl_entry))
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000217 break;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000218 len -= len % sizeof(struct dtl_entry);
219
220 /* Check that they have previously registered a VPA */
221 err = H_RESOURCE;
222 if (!vpa_is_registered(&tvcpu->arch.vpa))
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000223 break;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000224
225 vpap = &tvcpu->arch.dtl;
226 err = 0;
227 break;
228
229 case H_VPA_REG_SLB: /* register SLB shadow buffer */
230 /* Check that they have previously registered a VPA */
231 err = H_RESOURCE;
232 if (!vpa_is_registered(&tvcpu->arch.vpa))
233 break;
234
235 vpap = &tvcpu->arch.slb_shadow;
236 err = 0;
237 break;
238
239 case H_VPA_DEREG_VPA: /* deregister VPA */
240 /* Check they don't still have a DTL or SLB buf registered */
241 err = H_RESOURCE;
242 if (vpa_is_registered(&tvcpu->arch.dtl) ||
243 vpa_is_registered(&tvcpu->arch.slb_shadow))
244 break;
245
246 vpap = &tvcpu->arch.vpa;
247 err = 0;
248 break;
249
250 case H_VPA_DEREG_DTL: /* deregister DTL */
251 vpap = &tvcpu->arch.dtl;
252 err = 0;
253 break;
254
255 case H_VPA_DEREG_SLB: /* deregister SLB shadow buffer */
256 vpap = &tvcpu->arch.slb_shadow;
257 err = 0;
258 break;
259 }
260
261 if (vpap) {
262 vpap->next_gpa = vpa;
263 vpap->len = len;
264 vpap->update_pending = 1;
265 }
266
267 spin_unlock(&tvcpu->arch.vpa_update_lock);
268
269 return err;
270}
271
Paul Mackerras081f3232012-06-01 20:20:24 +1000272static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000273{
Paul Mackerras081f3232012-06-01 20:20:24 +1000274 struct kvm *kvm = vcpu->kvm;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000275 void *va;
276 unsigned long nb;
Paul Mackerras081f3232012-06-01 20:20:24 +1000277 unsigned long gpa;
278
279 /*
280 * We need to pin the page pointed to by vpap->next_gpa,
281 * but we can't call kvmppc_pin_guest_page under the lock
282 * as it does get_user_pages() and down_read(). So we
283 * have to drop the lock, pin the page, then get the lock
284 * again and check that a new area didn't get registered
285 * in the meantime.
286 */
287 for (;;) {
288 gpa = vpap->next_gpa;
289 spin_unlock(&vcpu->arch.vpa_update_lock);
290 va = NULL;
291 nb = 0;
292 if (gpa)
293 va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
294 spin_lock(&vcpu->arch.vpa_update_lock);
295 if (gpa == vpap->next_gpa)
296 break;
297 /* sigh... unpin that one and try again */
298 if (va)
299 kvmppc_unpin_guest_page(kvm, va);
300 }
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000301
302 vpap->update_pending = 0;
Paul Mackerras081f3232012-06-01 20:20:24 +1000303 if (va && nb < vpap->len) {
304 /*
305 * If it's now too short, it must be that userspace
306 * has changed the mappings underlying guest memory,
307 * so unregister the region.
308 */
309 kvmppc_unpin_guest_page(kvm, va);
310 va = NULL;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000311 }
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000312 if (vpap->pinned_addr)
313 kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
314 vpap->pinned_addr = va;
315 if (va)
316 vpap->pinned_end = va + vpap->len;
317}
Paul Mackerras93e60242011-12-12 12:28:55 +0000318
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000319static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
320{
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000321 spin_lock(&vcpu->arch.vpa_update_lock);
322 if (vcpu->arch.vpa.update_pending) {
Paul Mackerras081f3232012-06-01 20:20:24 +1000323 kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000324 init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
325 }
326 if (vcpu->arch.dtl.update_pending) {
Paul Mackerras081f3232012-06-01 20:20:24 +1000327 kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000328 vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
329 vcpu->arch.dtl_index = 0;
330 }
331 if (vcpu->arch.slb_shadow.update_pending)
Paul Mackerras081f3232012-06-01 20:20:24 +1000332 kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000333 spin_unlock(&vcpu->arch.vpa_update_lock);
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000334}
335
Paul Mackerras0456ec42012-02-03 00:56:21 +0000336static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
337 struct kvmppc_vcore *vc)
338{
339 struct dtl_entry *dt;
340 struct lppaca *vpa;
341 unsigned long old_stolen;
342
343 dt = vcpu->arch.dtl_ptr;
344 vpa = vcpu->arch.vpa.pinned_addr;
345 old_stolen = vcpu->arch.stolen_logged;
346 vcpu->arch.stolen_logged = vc->stolen_tb;
347 if (!dt || !vpa)
348 return;
349 memset(dt, 0, sizeof(struct dtl_entry));
350 dt->dispatch_reason = 7;
351 dt->processor_id = vc->pcpu + vcpu->arch.ptid;
352 dt->timebase = mftb();
353 dt->enqueue_to_dispatch_time = vc->stolen_tb - old_stolen;
354 dt->srr0 = kvmppc_get_pc(vcpu);
355 dt->srr1 = vcpu->arch.shregs.msr;
356 ++dt;
357 if (dt == vcpu->arch.dtl.pinned_end)
358 dt = vcpu->arch.dtl.pinned_addr;
359 vcpu->arch.dtl_ptr = dt;
360 /* order writing *dt vs. writing vpa->dtl_idx */
361 smp_wmb();
362 vpa->dtl_idx = ++vcpu->arch.dtl_index;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000363}
364
365int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
366{
367 unsigned long req = kvmppc_get_gpr(vcpu, 3);
368 unsigned long target, ret = H_SUCCESS;
369 struct kvm_vcpu *tvcpu;
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000370 int idx;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000371
372 switch (req) {
Paul Mackerrasc77162d2011-12-12 12:31:00 +0000373 case H_ENTER:
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000374 idx = srcu_read_lock(&vcpu->kvm->srcu);
Paul Mackerrasc77162d2011-12-12 12:31:00 +0000375 ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
376 kvmppc_get_gpr(vcpu, 5),
377 kvmppc_get_gpr(vcpu, 6),
378 kvmppc_get_gpr(vcpu, 7));
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000379 srcu_read_unlock(&vcpu->kvm->srcu, idx);
Paul Mackerrasc77162d2011-12-12 12:31:00 +0000380 break;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000381 case H_CEDE:
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000382 break;
383 case H_PROD:
384 target = kvmppc_get_gpr(vcpu, 4);
385 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
386 if (!tvcpu) {
387 ret = H_PARAMETER;
388 break;
389 }
390 tvcpu->arch.prodded = 1;
391 smp_mb();
392 if (vcpu->arch.ceded) {
393 if (waitqueue_active(&vcpu->wq)) {
394 wake_up_interruptible(&vcpu->wq);
395 vcpu->stat.halt_wakeup++;
396 }
397 }
398 break;
399 case H_CONFER:
400 break;
401 case H_REGISTER_VPA:
402 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
403 kvmppc_get_gpr(vcpu, 5),
404 kvmppc_get_gpr(vcpu, 6));
405 break;
406 default:
407 return RESUME_HOST;
408 }
409 kvmppc_set_gpr(vcpu, 3, ret);
410 vcpu->arch.hcall_needed = 0;
411 return RESUME_GUEST;
412}
413
Paul Mackerrasde56a942011-06-29 00:21:34 +0000414static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
415 struct task_struct *tsk)
416{
417 int r = RESUME_HOST;
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000418 int srcu_idx;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000419
420 vcpu->stat.sum_exits++;
421
422 run->exit_reason = KVM_EXIT_UNKNOWN;
423 run->ready_for_interrupt_injection = 1;
424 switch (vcpu->arch.trap) {
425 /* We're good on these - the host merely wanted to get our attention */
426 case BOOK3S_INTERRUPT_HV_DECREMENTER:
427 vcpu->stat.dec_exits++;
428 r = RESUME_GUEST;
429 break;
430 case BOOK3S_INTERRUPT_EXTERNAL:
431 vcpu->stat.ext_intr_exits++;
432 r = RESUME_GUEST;
433 break;
434 case BOOK3S_INTERRUPT_PERFMON:
435 r = RESUME_GUEST;
436 break;
437 case BOOK3S_INTERRUPT_PROGRAM:
438 {
439 ulong flags;
440 /*
441 * Normally program interrupts are delivered directly
442 * to the guest by the hardware, but we can get here
443 * as a result of a hypervisor emulation interrupt
444 * (e40) getting turned into a 700 by BML RTAS.
445 */
446 flags = vcpu->arch.shregs.msr & 0x1f0000ull;
447 kvmppc_core_queue_program(vcpu, flags);
448 r = RESUME_GUEST;
449 break;
450 }
451 case BOOK3S_INTERRUPT_SYSCALL:
452 {
453 /* hcall - punt to userspace */
454 int i;
455
456 if (vcpu->arch.shregs.msr & MSR_PR) {
457 /* sc 1 from userspace - reflect to guest syscall */
458 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_SYSCALL);
459 r = RESUME_GUEST;
460 break;
461 }
462 run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
463 for (i = 0; i < 9; ++i)
464 run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
465 run->exit_reason = KVM_EXIT_PAPR_HCALL;
466 vcpu->arch.hcall_needed = 1;
467 r = RESUME_HOST;
468 break;
469 }
470 /*
Paul Mackerras342d3db2011-12-12 12:38:05 +0000471 * We get these next two if the guest accesses a page which it thinks
472 * it has mapped but which is not actually present, either because
473 * it is for an emulated I/O device or because the corresonding
474 * host page has been paged out. Any other HDSI/HISI interrupts
475 * have been handled already.
Paul Mackerrasde56a942011-06-29 00:21:34 +0000476 */
477 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000478 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
Paul Mackerras697d3892011-12-12 12:36:37 +0000479 r = kvmppc_book3s_hv_page_fault(run, vcpu,
480 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000481 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000482 break;
483 case BOOK3S_INTERRUPT_H_INST_STORAGE:
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000484 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
Paul Mackerras342d3db2011-12-12 12:38:05 +0000485 r = kvmppc_book3s_hv_page_fault(run, vcpu,
486 kvmppc_get_pc(vcpu), 0);
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000487 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000488 break;
489 /*
490 * This occurs if the guest executes an illegal instruction.
491 * We just generate a program interrupt to the guest, since
492 * we don't emulate any guest instructions at this stage.
493 */
494 case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
495 kvmppc_core_queue_program(vcpu, 0x80000);
496 r = RESUME_GUEST;
497 break;
498 default:
499 kvmppc_dump_regs(vcpu);
500 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
501 vcpu->arch.trap, kvmppc_get_pc(vcpu),
502 vcpu->arch.shregs.msr);
503 r = RESUME_HOST;
504 BUG();
505 break;
506 }
507
Paul Mackerrasde56a942011-06-29 00:21:34 +0000508 return r;
509}
510
511int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
512 struct kvm_sregs *sregs)
513{
514 int i;
515
516 sregs->pvr = vcpu->arch.pvr;
517
518 memset(sregs, 0, sizeof(struct kvm_sregs));
519 for (i = 0; i < vcpu->arch.slb_max; i++) {
520 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
521 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
522 }
523
524 return 0;
525}
526
527int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
528 struct kvm_sregs *sregs)
529{
530 int i, j;
531
532 kvmppc_set_pvr(vcpu, sregs->pvr);
533
534 j = 0;
535 for (i = 0; i < vcpu->arch.slb_nr; i++) {
536 if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) {
537 vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe;
538 vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv;
539 ++j;
540 }
541 }
542 vcpu->arch.slb_max = j;
543
544 return 0;
545}
546
Paul Mackerras31f34382011-12-12 12:26:50 +0000547int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
548{
549 int r = -EINVAL;
550
551 switch (reg->id) {
552 case KVM_REG_PPC_HIOR:
553 r = put_user(0, (u64 __user *)reg->addr);
554 break;
555 default:
556 break;
557 }
558
559 return r;
560}
561
562int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
563{
564 int r = -EINVAL;
565
566 switch (reg->id) {
567 case KVM_REG_PPC_HIOR:
568 {
569 u64 hior;
570 /* Only allow this to be set to zero */
571 r = get_user(hior, (u64 __user *)reg->addr);
572 if (!r && (hior != 0))
573 r = -EINVAL;
574 break;
575 }
576 default:
577 break;
578 }
579
580 return r;
581}
582
Paul Mackerrasde56a942011-06-29 00:21:34 +0000583int kvmppc_core_check_processor_compat(void)
584{
Paul Mackerras9e368f22011-06-29 00:40:08 +0000585 if (cpu_has_feature(CPU_FTR_HVMODE))
Paul Mackerrasde56a942011-06-29 00:21:34 +0000586 return 0;
587 return -EIO;
588}
589
590struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
591{
592 struct kvm_vcpu *vcpu;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000593 int err = -EINVAL;
594 int core;
595 struct kvmppc_vcore *vcore;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000596
Paul Mackerras371fefd2011-06-29 00:23:08 +0000597 core = id / threads_per_core;
598 if (core >= KVM_MAX_VCORES)
599 goto out;
600
601 err = -ENOMEM;
Sasha Levin6b75e6b2011-12-07 10:24:56 +0200602 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000603 if (!vcpu)
604 goto out;
605
606 err = kvm_vcpu_init(vcpu, kvm, id);
607 if (err)
608 goto free_vcpu;
609
610 vcpu->arch.shared = &vcpu->arch.shregs;
611 vcpu->arch.last_cpu = -1;
612 vcpu->arch.mmcr[0] = MMCR0_FC;
613 vcpu->arch.ctrl = CTRL_RUNLATCH;
614 /* default to host PVR, since we can't spoof it */
615 vcpu->arch.pvr = mfspr(SPRN_PVR);
616 kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000617 spin_lock_init(&vcpu->arch.vpa_update_lock);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000618
Paul Mackerrasde56a942011-06-29 00:21:34 +0000619 kvmppc_mmu_book3s_hv_init(vcpu);
620
Paul Mackerras371fefd2011-06-29 00:23:08 +0000621 /*
Paul Mackerras19ccb762011-07-23 17:42:46 +1000622 * We consider the vcpu stopped until we see the first run ioctl for it.
Paul Mackerras371fefd2011-06-29 00:23:08 +0000623 */
Paul Mackerras19ccb762011-07-23 17:42:46 +1000624 vcpu->arch.state = KVMPPC_VCPU_STOPPED;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000625
626 init_waitqueue_head(&vcpu->arch.cpu_run);
627
628 mutex_lock(&kvm->lock);
629 vcore = kvm->arch.vcores[core];
630 if (!vcore) {
631 vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
632 if (vcore) {
633 INIT_LIST_HEAD(&vcore->runnable_threads);
634 spin_lock_init(&vcore->lock);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000635 init_waitqueue_head(&vcore->wq);
Paul Mackerras0456ec42012-02-03 00:56:21 +0000636 vcore->preempt_tb = mftb();
Paul Mackerras371fefd2011-06-29 00:23:08 +0000637 }
638 kvm->arch.vcores[core] = vcore;
639 }
640 mutex_unlock(&kvm->lock);
641
642 if (!vcore)
643 goto free_vcpu;
644
645 spin_lock(&vcore->lock);
646 ++vcore->num_threads;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000647 spin_unlock(&vcore->lock);
648 vcpu->arch.vcore = vcore;
Paul Mackerras0456ec42012-02-03 00:56:21 +0000649 vcpu->arch.stolen_logged = vcore->stolen_tb;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000650
Alexander Grafaf8f38b2011-08-10 13:57:08 +0200651 vcpu->arch.cpu_type = KVM_CPU_3S_64;
652 kvmppc_sanity_check(vcpu);
653
Paul Mackerrasde56a942011-06-29 00:21:34 +0000654 return vcpu;
655
656free_vcpu:
Sasha Levin6b75e6b2011-12-07 10:24:56 +0200657 kmem_cache_free(kvm_vcpu_cache, vcpu);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000658out:
659 return ERR_PTR(err);
660}
661
662void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
663{
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000664 spin_lock(&vcpu->arch.vpa_update_lock);
665 if (vcpu->arch.dtl.pinned_addr)
666 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr);
667 if (vcpu->arch.slb_shadow.pinned_addr)
668 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr);
669 if (vcpu->arch.vpa.pinned_addr)
670 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
671 spin_unlock(&vcpu->arch.vpa_update_lock);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000672 kvm_vcpu_uninit(vcpu);
Sasha Levin6b75e6b2011-12-07 10:24:56 +0200673 kmem_cache_free(kvm_vcpu_cache, vcpu);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000674}
675
Paul Mackerras19ccb762011-07-23 17:42:46 +1000676static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
Paul Mackerrasde56a942011-06-29 00:21:34 +0000677{
Paul Mackerras19ccb762011-07-23 17:42:46 +1000678 unsigned long dec_nsec, now;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000679
Paul Mackerras19ccb762011-07-23 17:42:46 +1000680 now = get_tb();
681 if (now > vcpu->arch.dec_expires) {
682 /* decrementer has already gone negative */
683 kvmppc_core_queue_dec(vcpu);
Scott Wood7e28e60e2011-11-08 18:23:20 -0600684 kvmppc_core_prepare_to_enter(vcpu);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000685 return;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000686 }
Paul Mackerras19ccb762011-07-23 17:42:46 +1000687 dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
688 / tb_ticks_per_sec;
689 hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
690 HRTIMER_MODE_REL);
691 vcpu->arch.timer_running = 1;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000692}
693
Paul Mackerras19ccb762011-07-23 17:42:46 +1000694static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
Paul Mackerras371fefd2011-06-29 00:23:08 +0000695{
Paul Mackerras19ccb762011-07-23 17:42:46 +1000696 vcpu->arch.ceded = 0;
697 if (vcpu->arch.timer_running) {
698 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
699 vcpu->arch.timer_running = 0;
700 }
Paul Mackerras371fefd2011-06-29 00:23:08 +0000701}
702
703extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
704extern void xics_wake_cpu(int cpu);
705
706static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
707 struct kvm_vcpu *vcpu)
708{
709 struct kvm_vcpu *v;
710
711 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
712 return;
713 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
714 --vc->n_runnable;
Paul Mackerras19ccb762011-07-23 17:42:46 +1000715 ++vc->n_busy;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000716 /* decrement the physical thread id of each following vcpu */
717 v = vcpu;
718 list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
719 --v->arch.ptid;
720 list_del(&vcpu->arch.run_list);
721}
722
Paul Mackerrasf0888f72012-02-03 00:54:17 +0000723static int kvmppc_grab_hwthread(int cpu)
724{
725 struct paca_struct *tpaca;
726 long timeout = 1000;
727
728 tpaca = &paca[cpu];
729
730 /* Ensure the thread won't go into the kernel if it wakes */
731 tpaca->kvm_hstate.hwthread_req = 1;
732
733 /*
734 * If the thread is already executing in the kernel (e.g. handling
735 * a stray interrupt), wait for it to get back to nap mode.
736 * The smp_mb() is to ensure that our setting of hwthread_req
737 * is visible before we look at hwthread_state, so if this
738 * races with the code at system_reset_pSeries and the thread
739 * misses our setting of hwthread_req, we are sure to see its
740 * setting of hwthread_state, and vice versa.
741 */
742 smp_mb();
743 while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) {
744 if (--timeout <= 0) {
745 pr_err("KVM: couldn't grab cpu %d\n", cpu);
746 return -EBUSY;
747 }
748 udelay(1);
749 }
750 return 0;
751}
752
753static void kvmppc_release_hwthread(int cpu)
754{
755 struct paca_struct *tpaca;
756
757 tpaca = &paca[cpu];
758 tpaca->kvm_hstate.hwthread_req = 0;
759 tpaca->kvm_hstate.kvm_vcpu = NULL;
760}
761
Paul Mackerras371fefd2011-06-29 00:23:08 +0000762static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
763{
764 int cpu;
765 struct paca_struct *tpaca;
766 struct kvmppc_vcore *vc = vcpu->arch.vcore;
767
Paul Mackerras19ccb762011-07-23 17:42:46 +1000768 if (vcpu->arch.timer_running) {
769 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
770 vcpu->arch.timer_running = 0;
771 }
Paul Mackerras371fefd2011-06-29 00:23:08 +0000772 cpu = vc->pcpu + vcpu->arch.ptid;
773 tpaca = &paca[cpu];
774 tpaca->kvm_hstate.kvm_vcpu = vcpu;
775 tpaca->kvm_hstate.kvm_vcore = vc;
Paul Mackerras19ccb762011-07-23 17:42:46 +1000776 tpaca->kvm_hstate.napping = 0;
777 vcpu->cpu = vc->pcpu;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000778 smp_wmb();
Michael Neuling251da032011-11-10 16:03:20 +0000779#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
Paul Mackerras371fefd2011-06-29 00:23:08 +0000780 if (vcpu->arch.ptid) {
Paul Mackerrasf0888f72012-02-03 00:54:17 +0000781 kvmppc_grab_hwthread(cpu);
Paul Mackerras371fefd2011-06-29 00:23:08 +0000782 xics_wake_cpu(cpu);
783 ++vc->n_woken;
784 }
785#endif
786}
787
788static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
789{
790 int i;
791
792 HMT_low();
793 i = 0;
794 while (vc->nap_count < vc->n_woken) {
795 if (++i >= 1000000) {
796 pr_err("kvmppc_wait_for_nap timeout %d %d\n",
797 vc->nap_count, vc->n_woken);
798 break;
799 }
800 cpu_relax();
801 }
802 HMT_medium();
803}
804
805/*
806 * Check that we are on thread 0 and that any other threads in
807 * this core are off-line.
808 */
809static int on_primary_thread(void)
810{
811 int cpu = smp_processor_id();
812 int thr = cpu_thread_in_core(cpu);
813
814 if (thr)
815 return 0;
816 while (++thr < threads_per_core)
817 if (cpu_online(cpu + thr))
818 return 0;
819 return 1;
820}
821
822/*
823 * Run a set of guest threads on a physical core.
824 * Called with vc->lock held.
825 */
826static int kvmppc_run_core(struct kvmppc_vcore *vc)
827{
Paul Mackerras19ccb762011-07-23 17:42:46 +1000828 struct kvm_vcpu *vcpu, *vcpu0, *vnext;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000829 long ret;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000830 u64 now;
Paul Mackerras081f3232012-06-01 20:20:24 +1000831 int ptid, i, need_vpa_update;
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000832 int srcu_idx;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000833
Paul Mackerras371fefd2011-06-29 00:23:08 +0000834 /* don't start if any threads have a signal pending */
Paul Mackerras081f3232012-06-01 20:20:24 +1000835 need_vpa_update = 0;
836 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
Paul Mackerras371fefd2011-06-29 00:23:08 +0000837 if (signal_pending(vcpu->arch.run_task))
838 return 0;
Paul Mackerras081f3232012-06-01 20:20:24 +1000839 need_vpa_update |= vcpu->arch.vpa.update_pending |
840 vcpu->arch.slb_shadow.update_pending |
841 vcpu->arch.dtl.update_pending;
842 }
843
844 /*
845 * Initialize *vc, in particular vc->vcore_state, so we can
846 * drop the vcore lock if necessary.
847 */
848 vc->n_woken = 0;
849 vc->nap_count = 0;
850 vc->entry_exit_count = 0;
851 vc->vcore_state = VCORE_RUNNING;
852 vc->in_guest = 0;
853 vc->napping_threads = 0;
854
855 /*
856 * Updating any of the vpas requires calling kvmppc_pin_guest_page,
857 * which can't be called with any spinlocks held.
858 */
859 if (need_vpa_update) {
860 spin_unlock(&vc->lock);
861 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
862 kvmppc_update_vpas(vcpu);
863 spin_lock(&vc->lock);
864 }
Paul Mackerrasde56a942011-06-29 00:21:34 +0000865
866 /*
867 * Make sure we are running on thread 0, and that
868 * secondary threads are offline.
869 * XXX we should also block attempts to bring any
870 * secondary threads online.
871 */
Paul Mackerras371fefd2011-06-29 00:23:08 +0000872 if (threads_per_core > 1 && !on_primary_thread()) {
873 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
874 vcpu->arch.ret = -EBUSY;
875 goto out;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000876 }
877
Paul Mackerras19ccb762011-07-23 17:42:46 +1000878 /*
879 * Assign physical thread IDs, first to non-ceded vcpus
880 * and then to ceded ones.
881 */
882 ptid = 0;
883 vcpu0 = NULL;
884 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
885 if (!vcpu->arch.ceded) {
886 if (!ptid)
887 vcpu0 = vcpu;
888 vcpu->arch.ptid = ptid++;
889 }
890 }
891 if (!vcpu0)
892 return 0; /* nothing to run */
893 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
894 if (vcpu->arch.ceded)
895 vcpu->arch.ptid = ptid++;
896
Paul Mackerras0456ec42012-02-03 00:56:21 +0000897 vc->stolen_tb += mftb() - vc->preempt_tb;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000898 vc->pcpu = smp_processor_id();
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000899 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
Paul Mackerras371fefd2011-06-29 00:23:08 +0000900 kvmppc_start_thread(vcpu);
Paul Mackerras0456ec42012-02-03 00:56:21 +0000901 kvmppc_create_dtl_entry(vcpu, vc);
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000902 }
Paul Mackerrasf0888f72012-02-03 00:54:17 +0000903 /* Grab any remaining hw threads so they can't go into the kernel */
904 for (i = ptid; i < threads_per_core; ++i)
905 kvmppc_grab_hwthread(vc->pcpu + i);
Paul Mackerras371fefd2011-06-29 00:23:08 +0000906
907 preempt_disable();
Paul Mackerras19ccb762011-07-23 17:42:46 +1000908 spin_unlock(&vc->lock);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000909
Paul Mackerras19ccb762011-07-23 17:42:46 +1000910 kvm_guest_enter();
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000911
912 srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu);
913
Paul Mackerras19ccb762011-07-23 17:42:46 +1000914 __kvmppc_vcore_entry(NULL, vcpu0);
Paul Mackerrasf0888f72012-02-03 00:54:17 +0000915 for (i = 0; i < threads_per_core; ++i)
916 kvmppc_release_hwthread(vc->pcpu + i);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000917
Paul Mackerras371fefd2011-06-29 00:23:08 +0000918 spin_lock(&vc->lock);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000919 /* disable sending of IPIs on virtual external irqs */
920 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
921 vcpu->cpu = -1;
922 /* wait for secondary threads to finish writing their state to memory */
Paul Mackerras371fefd2011-06-29 00:23:08 +0000923 if (vc->nap_count < vc->n_woken)
924 kvmppc_wait_for_nap(vc);
925 /* prevent other vcpu threads from doing kvmppc_start_thread() now */
Paul Mackerras19ccb762011-07-23 17:42:46 +1000926 vc->vcore_state = VCORE_EXITING;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000927 spin_unlock(&vc->lock);
928
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000929 srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx);
930
Paul Mackerras371fefd2011-06-29 00:23:08 +0000931 /* make sure updates to secondary vcpu structs are visible now */
932 smp_mb();
Paul Mackerrasde56a942011-06-29 00:21:34 +0000933 kvm_guest_exit();
934
935 preempt_enable();
936 kvm_resched(vcpu);
937
938 now = get_tb();
Paul Mackerras371fefd2011-06-29 00:23:08 +0000939 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
940 /* cancel pending dec exception if dec is positive */
941 if (now < vcpu->arch.dec_expires &&
942 kvmppc_core_pending_dec(vcpu))
943 kvmppc_core_dequeue_dec(vcpu);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000944
945 ret = RESUME_GUEST;
946 if (vcpu->arch.trap)
947 ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
948 vcpu->arch.run_task);
949
Paul Mackerras371fefd2011-06-29 00:23:08 +0000950 vcpu->arch.ret = ret;
951 vcpu->arch.trap = 0;
Paul Mackerras19ccb762011-07-23 17:42:46 +1000952
953 if (vcpu->arch.ceded) {
954 if (ret != RESUME_GUEST)
955 kvmppc_end_cede(vcpu);
956 else
957 kvmppc_set_timer(vcpu);
958 }
Paul Mackerras371fefd2011-06-29 00:23:08 +0000959 }
Paul Mackerrasde56a942011-06-29 00:21:34 +0000960
Paul Mackerras371fefd2011-06-29 00:23:08 +0000961 spin_lock(&vc->lock);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000962 out:
Paul Mackerras19ccb762011-07-23 17:42:46 +1000963 vc->vcore_state = VCORE_INACTIVE;
Paul Mackerras0456ec42012-02-03 00:56:21 +0000964 vc->preempt_tb = mftb();
Paul Mackerras371fefd2011-06-29 00:23:08 +0000965 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
966 arch.run_list) {
967 if (vcpu->arch.ret != RESUME_GUEST) {
968 kvmppc_remove_runnable(vc, vcpu);
969 wake_up(&vcpu->arch.cpu_run);
970 }
971 }
972
973 return 1;
974}
975
Paul Mackerras19ccb762011-07-23 17:42:46 +1000976/*
977 * Wait for some other vcpu thread to execute us, and
978 * wake us up when we need to handle something in the host.
979 */
980static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
Paul Mackerras371fefd2011-06-29 00:23:08 +0000981{
Paul Mackerras371fefd2011-06-29 00:23:08 +0000982 DEFINE_WAIT(wait);
983
Paul Mackerras19ccb762011-07-23 17:42:46 +1000984 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
985 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
986 schedule();
987 finish_wait(&vcpu->arch.cpu_run, &wait);
988}
Paul Mackerras371fefd2011-06-29 00:23:08 +0000989
Paul Mackerras19ccb762011-07-23 17:42:46 +1000990/*
991 * All the vcpus in this vcore are idle, so wait for a decrementer
992 * or external interrupt to one of the vcpus. vc->lock is held.
993 */
994static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
995{
996 DEFINE_WAIT(wait);
997 struct kvm_vcpu *v;
998 int all_idle = 1;
999
1000 prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
1001 vc->vcore_state = VCORE_SLEEPING;
1002 spin_unlock(&vc->lock);
1003 list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
1004 if (!v->arch.ceded || v->arch.pending_exceptions) {
1005 all_idle = 0;
1006 break;
1007 }
1008 }
1009 if (all_idle)
1010 schedule();
1011 finish_wait(&vc->wq, &wait);
1012 spin_lock(&vc->lock);
1013 vc->vcore_state = VCORE_INACTIVE;
1014}
1015
1016static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1017{
1018 int n_ceded;
1019 int prev_state;
1020 struct kvmppc_vcore *vc;
1021 struct kvm_vcpu *v, *vn;
Paul Mackerras9e368f22011-06-29 00:40:08 +00001022
Paul Mackerras371fefd2011-06-29 00:23:08 +00001023 kvm_run->exit_reason = 0;
1024 vcpu->arch.ret = RESUME_GUEST;
1025 vcpu->arch.trap = 0;
1026
Paul Mackerras371fefd2011-06-29 00:23:08 +00001027 /*
1028 * Synchronize with other threads in this virtual core
1029 */
1030 vc = vcpu->arch.vcore;
1031 spin_lock(&vc->lock);
Paul Mackerras19ccb762011-07-23 17:42:46 +10001032 vcpu->arch.ceded = 0;
Paul Mackerras371fefd2011-06-29 00:23:08 +00001033 vcpu->arch.run_task = current;
1034 vcpu->arch.kvm_run = kvm_run;
Paul Mackerras19ccb762011-07-23 17:42:46 +10001035 prev_state = vcpu->arch.state;
1036 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
Paul Mackerras371fefd2011-06-29 00:23:08 +00001037 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
1038 ++vc->n_runnable;
1039
Paul Mackerras19ccb762011-07-23 17:42:46 +10001040 /*
1041 * This happens the first time this is called for a vcpu.
1042 * If the vcore is already running, we may be able to start
1043 * this thread straight away and have it join in.
1044 */
1045 if (prev_state == KVMPPC_VCPU_STOPPED) {
1046 if (vc->vcore_state == VCORE_RUNNING &&
1047 VCORE_EXIT_COUNT(vc) == 0) {
1048 vcpu->arch.ptid = vc->n_runnable - 1;
Paul Mackerras371fefd2011-06-29 00:23:08 +00001049 kvmppc_start_thread(vcpu);
Paul Mackerras19ccb762011-07-23 17:42:46 +10001050 }
Paul Mackerras371fefd2011-06-29 00:23:08 +00001051
Paul Mackerras19ccb762011-07-23 17:42:46 +10001052 } else if (prev_state == KVMPPC_VCPU_BUSY_IN_HOST)
1053 --vc->n_busy;
1054
1055 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
1056 !signal_pending(current)) {
1057 if (vc->n_busy || vc->vcore_state != VCORE_INACTIVE) {
1058 spin_unlock(&vc->lock);
1059 kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
1060 spin_lock(&vc->lock);
1061 continue;
1062 }
Paul Mackerras0456ec42012-02-03 00:56:21 +00001063 vc->runner = vcpu;
Paul Mackerras19ccb762011-07-23 17:42:46 +10001064 n_ceded = 0;
1065 list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
1066 n_ceded += v->arch.ceded;
1067 if (n_ceded == vc->n_runnable)
1068 kvmppc_vcore_blocked(vc);
1069 else
1070 kvmppc_run_core(vc);
1071
1072 list_for_each_entry_safe(v, vn, &vc->runnable_threads,
1073 arch.run_list) {
Scott Wood7e28e60e2011-11-08 18:23:20 -06001074 kvmppc_core_prepare_to_enter(v);
Paul Mackerras19ccb762011-07-23 17:42:46 +10001075 if (signal_pending(v->arch.run_task)) {
1076 kvmppc_remove_runnable(vc, v);
1077 v->stat.signal_exits++;
1078 v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
1079 v->arch.ret = -EINTR;
1080 wake_up(&v->arch.cpu_run);
1081 }
1082 }
Paul Mackerras0456ec42012-02-03 00:56:21 +00001083 vc->runner = NULL;
Paul Mackerras371fefd2011-06-29 00:23:08 +00001084 }
1085
Paul Mackerras19ccb762011-07-23 17:42:46 +10001086 if (signal_pending(current)) {
1087 if (vc->vcore_state == VCORE_RUNNING ||
1088 vc->vcore_state == VCORE_EXITING) {
1089 spin_unlock(&vc->lock);
1090 kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
1091 spin_lock(&vc->lock);
1092 }
1093 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
1094 kvmppc_remove_runnable(vc, vcpu);
1095 vcpu->stat.signal_exits++;
1096 kvm_run->exit_reason = KVM_EXIT_INTR;
1097 vcpu->arch.ret = -EINTR;
1098 }
1099 }
Paul Mackerras371fefd2011-06-29 00:23:08 +00001100
Paul Mackerras19ccb762011-07-23 17:42:46 +10001101 spin_unlock(&vc->lock);
Paul Mackerras371fefd2011-06-29 00:23:08 +00001102 return vcpu->arch.ret;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001103}
1104
Paul Mackerrasa8606e22011-06-29 00:22:05 +00001105int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
1106{
1107 int r;
1108
Alexander Grafaf8f38b2011-08-10 13:57:08 +02001109 if (!vcpu->arch.sane) {
1110 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1111 return -EINVAL;
1112 }
1113
Scott Wood25051b52011-11-08 18:23:23 -06001114 kvmppc_core_prepare_to_enter(vcpu);
1115
Paul Mackerras19ccb762011-07-23 17:42:46 +10001116 /* No need to go into the guest when all we'll do is come back out */
1117 if (signal_pending(current)) {
1118 run->exit_reason = KVM_EXIT_INTR;
1119 return -EINTR;
1120 }
1121
Paul Mackerras32fad282012-05-04 02:32:53 +00001122 atomic_inc(&vcpu->kvm->arch.vcpus_running);
1123 /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
1124 smp_mb();
1125
1126 /* On the first time here, set up HTAB and VRMA or RMA */
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001127 if (!vcpu->kvm->arch.rma_setup_done) {
Paul Mackerras32fad282012-05-04 02:32:53 +00001128 r = kvmppc_hv_setup_htab_rma(vcpu);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001129 if (r)
Paul Mackerras32fad282012-05-04 02:32:53 +00001130 goto out;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001131 }
Paul Mackerras19ccb762011-07-23 17:42:46 +10001132
1133 flush_fp_to_thread(current);
1134 flush_altivec_to_thread(current);
1135 flush_vsx_to_thread(current);
1136 vcpu->arch.wqp = &vcpu->arch.vcore->wq;
Paul Mackerras342d3db2011-12-12 12:38:05 +00001137 vcpu->arch.pgdir = current->mm->pgd;
Paul Mackerras19ccb762011-07-23 17:42:46 +10001138
Paul Mackerrasa8606e22011-06-29 00:22:05 +00001139 do {
1140 r = kvmppc_run_vcpu(run, vcpu);
1141
1142 if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
1143 !(vcpu->arch.shregs.msr & MSR_PR)) {
1144 r = kvmppc_pseries_do_hcall(vcpu);
Scott Wood7e28e60e2011-11-08 18:23:20 -06001145 kvmppc_core_prepare_to_enter(vcpu);
Paul Mackerrasa8606e22011-06-29 00:22:05 +00001146 }
1147 } while (r == RESUME_GUEST);
Paul Mackerras32fad282012-05-04 02:32:53 +00001148
1149 out:
1150 atomic_dec(&vcpu->kvm->arch.vcpus_running);
Paul Mackerrasa8606e22011-06-29 00:22:05 +00001151 return r;
1152}
1153
David Gibson54738c02011-06-29 00:22:41 +00001154
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001155/* Work out RMLS (real mode limit selector) field value for a given RMA size.
Paul Mackerras9e368f22011-06-29 00:40:08 +00001156 Assumes POWER7 or PPC970. */
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001157static inline int lpcr_rmls(unsigned long rma_size)
1158{
1159 switch (rma_size) {
1160 case 32ul << 20: /* 32 MB */
Paul Mackerras9e368f22011-06-29 00:40:08 +00001161 if (cpu_has_feature(CPU_FTR_ARCH_206))
1162 return 8; /* only supported on POWER7 */
1163 return -1;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001164 case 64ul << 20: /* 64 MB */
1165 return 3;
1166 case 128ul << 20: /* 128 MB */
1167 return 7;
1168 case 256ul << 20: /* 256 MB */
1169 return 4;
1170 case 1ul << 30: /* 1 GB */
1171 return 2;
1172 case 16ul << 30: /* 16 GB */
1173 return 1;
1174 case 256ul << 30: /* 256 GB */
1175 return 0;
1176 default:
1177 return -1;
1178 }
1179}
1180
1181static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1182{
Alexander Grafb4e70612012-01-16 16:50:10 +01001183 struct kvmppc_linear_info *ri = vma->vm_file->private_data;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001184 struct page *page;
1185
1186 if (vmf->pgoff >= ri->npages)
1187 return VM_FAULT_SIGBUS;
1188
1189 page = pfn_to_page(ri->base_pfn + vmf->pgoff);
1190 get_page(page);
1191 vmf->page = page;
1192 return 0;
1193}
1194
1195static const struct vm_operations_struct kvm_rma_vm_ops = {
1196 .fault = kvm_rma_fault,
1197};
1198
1199static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
1200{
1201 vma->vm_flags |= VM_RESERVED;
1202 vma->vm_ops = &kvm_rma_vm_ops;
1203 return 0;
1204}
1205
1206static int kvm_rma_release(struct inode *inode, struct file *filp)
1207{
Alexander Grafb4e70612012-01-16 16:50:10 +01001208 struct kvmppc_linear_info *ri = filp->private_data;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001209
1210 kvm_release_rma(ri);
1211 return 0;
1212}
1213
1214static struct file_operations kvm_rma_fops = {
1215 .mmap = kvm_rma_mmap,
1216 .release = kvm_rma_release,
1217};
1218
1219long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
1220{
Alexander Grafb4e70612012-01-16 16:50:10 +01001221 struct kvmppc_linear_info *ri;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001222 long fd;
1223
1224 ri = kvm_alloc_rma();
1225 if (!ri)
1226 return -ENOMEM;
1227
1228 fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR);
1229 if (fd < 0)
1230 kvm_release_rma(ri);
1231
1232 ret->rma_size = ri->npages << PAGE_SHIFT;
1233 return fd;
1234}
1235
Benjamin Herrenschmidt5b747162012-04-26 19:43:42 +00001236static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
1237 int linux_psize)
1238{
1239 struct mmu_psize_def *def = &mmu_psize_defs[linux_psize];
1240
1241 if (!def->shift)
1242 return;
1243 (*sps)->page_shift = def->shift;
1244 (*sps)->slb_enc = def->sllp;
1245 (*sps)->enc[0].page_shift = def->shift;
1246 (*sps)->enc[0].pte_enc = def->penc;
1247 (*sps)++;
1248}
1249
1250int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
1251{
1252 struct kvm_ppc_one_seg_page_size *sps;
1253
1254 info->flags = KVM_PPC_PAGE_SIZES_REAL;
1255 if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
1256 info->flags |= KVM_PPC_1T_SEGMENTS;
1257 info->slb_size = mmu_slb_size;
1258
1259 /* We only support these sizes for now, and no muti-size segments */
1260 sps = &info->sps[0];
1261 kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K);
1262 kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K);
1263 kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M);
1264
1265 return 0;
1266}
1267
Paul Mackerras82ed3612011-12-15 02:03:22 +00001268/*
1269 * Get (and clear) the dirty memory log for a memory slot.
1270 */
1271int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
1272{
1273 struct kvm_memory_slot *memslot;
1274 int r;
1275 unsigned long n;
1276
1277 mutex_lock(&kvm->slots_lock);
1278
1279 r = -EINVAL;
1280 if (log->slot >= KVM_MEMORY_SLOTS)
1281 goto out;
1282
1283 memslot = id_to_memslot(kvm->memslots, log->slot);
1284 r = -ENOENT;
1285 if (!memslot->dirty_bitmap)
1286 goto out;
1287
1288 n = kvm_dirty_bitmap_bytes(memslot);
1289 memset(memslot->dirty_bitmap, 0, n);
1290
Paul Mackerrasdfe49db2012-09-11 13:28:18 +00001291 r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap);
Paul Mackerras82ed3612011-12-15 02:03:22 +00001292 if (r)
1293 goto out;
1294
1295 r = -EFAULT;
1296 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
1297 goto out;
1298
1299 r = 0;
1300out:
1301 mutex_unlock(&kvm->slots_lock);
1302 return r;
1303}
1304
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001305static unsigned long slb_pgsize_encoding(unsigned long psize)
1306{
1307 unsigned long senc = 0;
1308
1309 if (psize > 0x1000) {
1310 senc = SLB_VSID_L;
1311 if (psize == 0x10000)
1312 senc |= SLB_VSID_LP_01;
1313 }
1314 return senc;
1315}
1316
Paul Mackerrasa66b48c2012-09-11 13:27:46 +00001317static void unpin_slot(struct kvm_memory_slot *memslot)
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001318{
1319 unsigned long *physp;
1320 unsigned long j, npages, pfn;
1321 struct page *page;
1322
Paul Mackerrasa66b48c2012-09-11 13:27:46 +00001323 physp = memslot->arch.slot_phys;
1324 npages = memslot->npages;
1325 if (!physp)
1326 return;
1327 for (j = 0; j < npages; j++) {
1328 if (!(physp[j] & KVMPPC_GOT_PAGE))
1329 continue;
1330 pfn = physp[j] >> PAGE_SHIFT;
1331 page = pfn_to_page(pfn);
1332 SetPageDirty(page);
1333 put_page(page);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001334 }
1335}
1336
Paul Mackerrasa66b48c2012-09-11 13:27:46 +00001337void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
1338 struct kvm_memory_slot *dont)
1339{
1340 if (!dont || free->arch.rmap != dont->arch.rmap) {
1341 vfree(free->arch.rmap);
1342 free->arch.rmap = NULL;
1343 }
1344 if (!dont || free->arch.slot_phys != dont->arch.slot_phys) {
1345 unpin_slot(free);
1346 vfree(free->arch.slot_phys);
1347 free->arch.slot_phys = NULL;
1348 }
1349}
1350
1351int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
1352 unsigned long npages)
1353{
1354 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
1355 if (!slot->arch.rmap)
1356 return -ENOMEM;
1357 slot->arch.slot_phys = NULL;
1358
1359 return 0;
1360}
1361
1362int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1363 struct kvm_memory_slot *memslot,
1364 struct kvm_userspace_memory_region *mem)
1365{
1366 unsigned long *phys;
1367
1368 /* Allocate a slot_phys array if needed */
1369 phys = memslot->arch.slot_phys;
1370 if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) {
1371 phys = vzalloc(memslot->npages * sizeof(unsigned long));
1372 if (!phys)
1373 return -ENOMEM;
1374 memslot->arch.slot_phys = phys;
1375 }
1376
1377 return 0;
1378}
1379
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001380void kvmppc_core_commit_memory_region(struct kvm *kvm,
Paul Mackerrasdfe49db2012-09-11 13:28:18 +00001381 struct kvm_userspace_memory_region *mem,
1382 struct kvm_memory_slot old)
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001383{
Paul Mackerrasdfe49db2012-09-11 13:28:18 +00001384 unsigned long npages = mem->memory_size >> PAGE_SHIFT;
1385 struct kvm_memory_slot *memslot;
1386
1387 if (npages && old.npages) {
1388 /*
1389 * If modifying a memslot, reset all the rmap dirty bits.
1390 * If this is a new memslot, we don't need to do anything
1391 * since the rmap array starts out as all zeroes,
1392 * i.e. no pages are dirty.
1393 */
1394 memslot = id_to_memslot(kvm->memslots, mem->slot);
1395 kvmppc_hv_get_dirty_log(kvm, memslot, NULL);
1396 }
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001397}
1398
Paul Mackerras32fad282012-05-04 02:32:53 +00001399static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001400{
1401 int err = 0;
1402 struct kvm *kvm = vcpu->kvm;
Alexander Grafb4e70612012-01-16 16:50:10 +01001403 struct kvmppc_linear_info *ri = NULL;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001404 unsigned long hva;
1405 struct kvm_memory_slot *memslot;
1406 struct vm_area_struct *vma;
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001407 unsigned long lpcr, senc;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001408 unsigned long psize, porder;
1409 unsigned long rma_size;
1410 unsigned long rmls;
1411 unsigned long *physp;
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001412 unsigned long i, npages;
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001413 int srcu_idx;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001414
1415 mutex_lock(&kvm->lock);
1416 if (kvm->arch.rma_setup_done)
1417 goto out; /* another vcpu beat us to it */
1418
Paul Mackerras32fad282012-05-04 02:32:53 +00001419 /* Allocate hashed page table (if not done already) and reset it */
1420 if (!kvm->arch.hpt_virt) {
1421 err = kvmppc_alloc_hpt(kvm, NULL);
1422 if (err) {
1423 pr_err("KVM: Couldn't alloc HPT\n");
1424 goto out;
1425 }
1426 }
1427
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001428 /* Look up the memslot for guest physical address 0 */
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001429 srcu_idx = srcu_read_lock(&kvm->srcu);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001430 memslot = gfn_to_memslot(kvm, 0);
1431
1432 /* We must have some memory at 0 by now */
1433 err = -EINVAL;
1434 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001435 goto out_srcu;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001436
1437 /* Look up the VMA for the start of this memory slot */
1438 hva = memslot->userspace_addr;
1439 down_read(&current->mm->mmap_sem);
1440 vma = find_vma(current->mm, hva);
1441 if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
1442 goto up_out;
1443
1444 psize = vma_kernel_pagesize(vma);
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001445 porder = __ilog2(psize);
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001446
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001447 /* Is this one of our preallocated RMAs? */
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001448 if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
1449 hva == vma->vm_start)
1450 ri = vma->vm_file->private_data;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001451
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001452 up_read(&current->mm->mmap_sem);
1453
1454 if (!ri) {
1455 /* On POWER7, use VRMA; on PPC970, give up */
1456 err = -EPERM;
1457 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
1458 pr_err("KVM: CPU requires an RMO\n");
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001459 goto out_srcu;
Paul Mackerras9e368f22011-06-29 00:40:08 +00001460 }
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001461
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001462 /* We can handle 4k, 64k or 16M pages in the VRMA */
1463 err = -EINVAL;
1464 if (!(psize == 0x1000 || psize == 0x10000 ||
1465 psize == 0x1000000))
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001466 goto out_srcu;
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001467
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001468 /* Update VRMASD field in the LPCR */
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001469 senc = slb_pgsize_encoding(psize);
Paul Mackerras697d3892011-12-12 12:36:37 +00001470 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
1471 (VRMA_VSID << SLB_VSID_SHIFT_1T);
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001472 lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
1473 lpcr |= senc << (LPCR_VRMASD_SH - 4);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001474 kvm->arch.lpcr = lpcr;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001475
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001476 /* Create HPTEs in the hash page table for the VRMA */
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001477 kvmppc_map_vrma(vcpu, memslot, porder);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001478
1479 } else {
1480 /* Set up to use an RMO region */
1481 rma_size = ri->npages;
1482 if (rma_size > memslot->npages)
1483 rma_size = memslot->npages;
1484 rma_size <<= PAGE_SHIFT;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001485 rmls = lpcr_rmls(rma_size);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001486 err = -EINVAL;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001487 if (rmls < 0) {
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001488 pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001489 goto out_srcu;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001490 }
1491 atomic_inc(&ri->use_count);
1492 kvm->arch.rma = ri;
Paul Mackerras9e368f22011-06-29 00:40:08 +00001493
1494 /* Update LPCR and RMOR */
1495 lpcr = kvm->arch.lpcr;
1496 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
1497 /* PPC970; insert RMLS value (split field) in HID4 */
1498 lpcr &= ~((1ul << HID4_RMLS0_SH) |
1499 (3ul << HID4_RMLS2_SH));
1500 lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) |
1501 ((rmls & 3) << HID4_RMLS2_SH);
1502 /* RMOR is also in HID4 */
1503 lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
1504 << HID4_RMOR_SH;
1505 } else {
1506 /* POWER7 */
1507 lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
1508 lpcr |= rmls << LPCR_RMLS_SH;
1509 kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
1510 }
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001511 kvm->arch.lpcr = lpcr;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001512 pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001513 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001514
1515 /* Initialize phys addrs of pages in RMO */
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001516 npages = ri->npages;
1517 porder = __ilog2(npages);
Paul Mackerrasa66b48c2012-09-11 13:27:46 +00001518 physp = memslot->arch.slot_phys;
1519 if (physp) {
1520 if (npages > memslot->npages)
1521 npages = memslot->npages;
1522 spin_lock(&kvm->arch.slot_phys_lock);
1523 for (i = 0; i < npages; ++i)
1524 physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) +
1525 porder;
1526 spin_unlock(&kvm->arch.slot_phys_lock);
1527 }
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001528 }
1529
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001530 /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
1531 smp_wmb();
1532 kvm->arch.rma_setup_done = 1;
1533 err = 0;
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001534 out_srcu:
1535 srcu_read_unlock(&kvm->srcu, srcu_idx);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001536 out:
1537 mutex_unlock(&kvm->lock);
1538 return err;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001539
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001540 up_out:
1541 up_read(&current->mm->mmap_sem);
1542 goto out;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001543}
1544
1545int kvmppc_core_init_vm(struct kvm *kvm)
1546{
Paul Mackerras32fad282012-05-04 02:32:53 +00001547 unsigned long lpcr, lpid;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001548
Paul Mackerras32fad282012-05-04 02:32:53 +00001549 /* Allocate the guest's logical partition ID */
1550
1551 lpid = kvmppc_alloc_lpid();
1552 if (lpid < 0)
1553 return -ENOMEM;
1554 kvm->arch.lpid = lpid;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001555
David Gibson54738c02011-06-29 00:22:41 +00001556 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001557
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001558 kvm->arch.rma = NULL;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001559
Paul Mackerras9e368f22011-06-29 00:40:08 +00001560 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001561
Paul Mackerras9e368f22011-06-29 00:40:08 +00001562 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
1563 /* PPC970; HID4 is effectively the LPCR */
Paul Mackerras9e368f22011-06-29 00:40:08 +00001564 kvm->arch.host_lpid = 0;
1565 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
1566 lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
1567 lpcr |= ((lpid >> 4) << HID4_LPID1_SH) |
1568 ((lpid & 0xf) << HID4_LPID5_SH);
1569 } else {
1570 /* POWER7; init LPCR for virtual RMA mode */
1571 kvm->arch.host_lpid = mfspr(SPRN_LPID);
1572 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
1573 lpcr &= LPCR_PECE | LPCR_LPES;
1574 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
Paul Mackerras697d3892011-12-12 12:36:37 +00001575 LPCR_VPM0 | LPCR_VPM1;
1576 kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
1577 (VRMA_VSID << SLB_VSID_SHIFT_1T);
Paul Mackerras9e368f22011-06-29 00:40:08 +00001578 }
1579 kvm->arch.lpcr = lpcr;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001580
Paul Mackerras342d3db2011-12-12 12:38:05 +00001581 kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001582 spin_lock_init(&kvm->arch.slot_phys_lock);
David Gibson54738c02011-06-29 00:22:41 +00001583 return 0;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001584}
1585
1586void kvmppc_core_destroy_vm(struct kvm *kvm)
1587{
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001588 if (kvm->arch.rma) {
1589 kvm_release_rma(kvm->arch.rma);
1590 kvm->arch.rma = NULL;
1591 }
1592
Paul Mackerrasde56a942011-06-29 00:21:34 +00001593 kvmppc_free_hpt(kvm);
David Gibson54738c02011-06-29 00:22:41 +00001594 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
Paul Mackerrasde56a942011-06-29 00:21:34 +00001595}
1596
1597/* These are stubs for now */
1598void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
1599{
1600}
1601
1602/* We don't need to emulate any privileged instructions or dcbz */
1603int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
1604 unsigned int inst, int *advance)
1605{
1606 return EMULATE_FAIL;
1607}
1608
Alexander Graf54771e62012-05-04 14:55:12 +02001609int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
Paul Mackerrasde56a942011-06-29 00:21:34 +00001610{
1611 return EMULATE_FAIL;
1612}
1613
Alexander Graf54771e62012-05-04 14:55:12 +02001614int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
Paul Mackerrasde56a942011-06-29 00:21:34 +00001615{
1616 return EMULATE_FAIL;
1617}
1618
1619static int kvmppc_book3s_hv_init(void)
1620{
1621 int r;
1622
1623 r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1624
1625 if (r)
1626 return r;
1627
1628 r = kvmppc_mmu_hv_init();
1629
1630 return r;
1631}
1632
1633static void kvmppc_book3s_hv_exit(void)
1634{
1635 kvm_exit();
1636}
1637
1638module_init(kvmppc_book3s_hv_init);
1639module_exit(kvmppc_book3s_hv_exit);