blob: 1cc6b77fa63d9975f6b153f1d2234e4ff38ef8f9 [file] [log] [blame]
Paul Mackerrasde56a942011-06-29 00:21:34 +00001/*
2 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
3 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
4 *
5 * Authors:
6 * Paul Mackerras <paulus@au1.ibm.com>
7 * Alexander Graf <agraf@suse.de>
8 * Kevin Wolf <mail@kevin-wolf.de>
9 *
10 * Description: KVM functions specific to running on Book 3S
11 * processors in hypervisor mode (specifically POWER7 and later).
12 *
13 * This file is derived from arch/powerpc/kvm/book3s.c,
14 * by Alexander Graf <agraf@suse.de>.
15 *
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License, version 2, as
18 * published by the Free Software Foundation.
19 */
20
21#include <linux/kvm_host.h>
22#include <linux/err.h>
23#include <linux/slab.h>
24#include <linux/preempt.h>
25#include <linux/sched.h>
26#include <linux/delay.h>
Paul Gortmaker66b15db2011-05-27 10:46:24 -040027#include <linux/export.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000028#include <linux/fs.h>
29#include <linux/anon_inodes.h>
30#include <linux/cpumask.h>
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +000031#include <linux/spinlock.h>
32#include <linux/page-flags.h>
Paul Mackerras2c9097e2012-09-11 13:27:01 +000033#include <linux/srcu.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000034
35#include <asm/reg.h>
36#include <asm/cputable.h>
37#include <asm/cacheflush.h>
38#include <asm/tlbflush.h>
39#include <asm/uaccess.h>
40#include <asm/io.h>
41#include <asm/kvm_ppc.h>
42#include <asm/kvm_book3s.h>
43#include <asm/mmu_context.h>
44#include <asm/lppaca.h>
45#include <asm/processor.h>
Paul Mackerras371fefd2011-06-29 00:23:08 +000046#include <asm/cputhreads.h>
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +000047#include <asm/page.h>
Michael Neulingde1d9242011-11-09 20:39:49 +000048#include <asm/hvcall.h>
David Howellsae3a1972012-03-28 18:30:02 +010049#include <asm/switch_to.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000050#include <linux/gfp.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000051#include <linux/vmalloc.h>
52#include <linux/highmem.h>
Paul Mackerrasc77162d2011-12-12 12:31:00 +000053#include <linux/hugetlb.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000054
55/* #define EXIT_DEBUG */
56/* #define EXIT_DEBUG_SIMPLE */
57/* #define EXIT_DEBUG_INT */
58
Paul Mackerras19ccb762011-07-23 17:42:46 +100059static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
Paul Mackerras32fad282012-05-04 02:32:53 +000060static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
Paul Mackerras19ccb762011-07-23 17:42:46 +100061
Paul Mackerrasde56a942011-06-29 00:21:34 +000062void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
63{
Paul Mackerras0456ec42012-02-03 00:56:21 +000064 struct kvmppc_vcore *vc = vcpu->arch.vcore;
65
Paul Mackerrasde56a942011-06-29 00:21:34 +000066 local_paca->kvm_hstate.kvm_vcpu = vcpu;
Paul Mackerras0456ec42012-02-03 00:56:21 +000067 local_paca->kvm_hstate.kvm_vcore = vc;
68 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
69 vc->stolen_tb += mftb() - vc->preempt_tb;
Paul Mackerrasde56a942011-06-29 00:21:34 +000070}
71
72void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
73{
Paul Mackerras0456ec42012-02-03 00:56:21 +000074 struct kvmppc_vcore *vc = vcpu->arch.vcore;
75
76 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
77 vc->preempt_tb = mftb();
Paul Mackerrasde56a942011-06-29 00:21:34 +000078}
79
Paul Mackerrasde56a942011-06-29 00:21:34 +000080void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
81{
82 vcpu->arch.shregs.msr = msr;
Paul Mackerras19ccb762011-07-23 17:42:46 +100083 kvmppc_end_cede(vcpu);
Paul Mackerrasde56a942011-06-29 00:21:34 +000084}
85
86void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
87{
88 vcpu->arch.pvr = pvr;
89}
90
91void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
92{
93 int r;
94
95 pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
96 pr_err("pc = %.16lx msr = %.16llx trap = %x\n",
97 vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap);
98 for (r = 0; r < 16; ++r)
99 pr_err("r%2d = %.16lx r%d = %.16lx\n",
100 r, kvmppc_get_gpr(vcpu, r),
101 r+16, kvmppc_get_gpr(vcpu, r+16));
102 pr_err("ctr = %.16lx lr = %.16lx\n",
103 vcpu->arch.ctr, vcpu->arch.lr);
104 pr_err("srr0 = %.16llx srr1 = %.16llx\n",
105 vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
106 pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
107 vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
108 pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
109 vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
110 pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n",
111 vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr);
112 pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
113 pr_err("fault dar = %.16lx dsisr = %.8x\n",
114 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
115 pr_err("SLB (%d entries):\n", vcpu->arch.slb_max);
116 for (r = 0; r < vcpu->arch.slb_max; ++r)
117 pr_err(" ESID = %.16llx VSID = %.16llx\n",
118 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
119 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +0000120 vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1,
Paul Mackerrasde56a942011-06-29 00:21:34 +0000121 vcpu->arch.last_inst);
122}
123
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000124struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
125{
126 int r;
127 struct kvm_vcpu *v, *ret = NULL;
128
129 mutex_lock(&kvm->lock);
130 kvm_for_each_vcpu(r, v, kvm) {
131 if (v->vcpu_id == id) {
132 ret = v;
133 break;
134 }
135 }
136 mutex_unlock(&kvm->lock);
137 return ret;
138}
139
140static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
141{
142 vpa->shared_proc = 1;
143 vpa->yield_count = 1;
144}
145
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000146/* Length for a per-processor buffer is passed in at offset 4 in the buffer */
147struct reg_vpa {
148 u32 dummy;
149 union {
150 u16 hword;
151 u32 word;
152 } length;
153};
154
155static int vpa_is_registered(struct kvmppc_vpa *vpap)
156{
157 if (vpap->update_pending)
158 return vpap->next_gpa != 0;
159 return vpap->pinned_addr != NULL;
160}
161
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000162static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
163 unsigned long flags,
164 unsigned long vcpuid, unsigned long vpa)
165{
166 struct kvm *kvm = vcpu->kvm;
Paul Mackerras93e60242011-12-12 12:28:55 +0000167 unsigned long len, nb;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000168 void *va;
169 struct kvm_vcpu *tvcpu;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000170 int err;
171 int subfunc;
172 struct kvmppc_vpa *vpap;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000173
174 tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
175 if (!tvcpu)
176 return H_PARAMETER;
177
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000178 subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK;
179 if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL ||
180 subfunc == H_VPA_REG_SLB) {
181 /* Registering new area - address must be cache-line aligned */
182 if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa)
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000183 return H_PARAMETER;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000184
185 /* convert logical addr to kernel addr and read length */
Paul Mackerras93e60242011-12-12 12:28:55 +0000186 va = kvmppc_pin_guest_page(kvm, vpa, &nb);
187 if (va == NULL)
Paul Mackerrasb2b2f162011-12-12 12:28:21 +0000188 return H_PARAMETER;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000189 if (subfunc == H_VPA_REG_VPA)
190 len = ((struct reg_vpa *)va)->length.hword;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000191 else
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000192 len = ((struct reg_vpa *)va)->length.word;
193 kvmppc_unpin_guest_page(kvm, va);
194
195 /* Check length */
196 if (len > nb || len < sizeof(struct reg_vpa))
197 return H_PARAMETER;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000198 } else {
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000199 vpa = 0;
200 len = 0;
201 }
202
203 err = H_PARAMETER;
204 vpap = NULL;
205 spin_lock(&tvcpu->arch.vpa_update_lock);
206
207 switch (subfunc) {
208 case H_VPA_REG_VPA: /* register VPA */
209 if (len < sizeof(struct lppaca))
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000210 break;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000211 vpap = &tvcpu->arch.vpa;
212 err = 0;
213 break;
214
215 case H_VPA_REG_DTL: /* register DTL */
216 if (len < sizeof(struct dtl_entry))
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000217 break;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000218 len -= len % sizeof(struct dtl_entry);
219
220 /* Check that they have previously registered a VPA */
221 err = H_RESOURCE;
222 if (!vpa_is_registered(&tvcpu->arch.vpa))
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000223 break;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000224
225 vpap = &tvcpu->arch.dtl;
226 err = 0;
227 break;
228
229 case H_VPA_REG_SLB: /* register SLB shadow buffer */
230 /* Check that they have previously registered a VPA */
231 err = H_RESOURCE;
232 if (!vpa_is_registered(&tvcpu->arch.vpa))
233 break;
234
235 vpap = &tvcpu->arch.slb_shadow;
236 err = 0;
237 break;
238
239 case H_VPA_DEREG_VPA: /* deregister VPA */
240 /* Check they don't still have a DTL or SLB buf registered */
241 err = H_RESOURCE;
242 if (vpa_is_registered(&tvcpu->arch.dtl) ||
243 vpa_is_registered(&tvcpu->arch.slb_shadow))
244 break;
245
246 vpap = &tvcpu->arch.vpa;
247 err = 0;
248 break;
249
250 case H_VPA_DEREG_DTL: /* deregister DTL */
251 vpap = &tvcpu->arch.dtl;
252 err = 0;
253 break;
254
255 case H_VPA_DEREG_SLB: /* deregister SLB shadow buffer */
256 vpap = &tvcpu->arch.slb_shadow;
257 err = 0;
258 break;
259 }
260
261 if (vpap) {
262 vpap->next_gpa = vpa;
263 vpap->len = len;
264 vpap->update_pending = 1;
265 }
266
267 spin_unlock(&tvcpu->arch.vpa_update_lock);
268
269 return err;
270}
271
Paul Mackerras081f3232012-06-01 20:20:24 +1000272static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000273{
Paul Mackerras081f3232012-06-01 20:20:24 +1000274 struct kvm *kvm = vcpu->kvm;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000275 void *va;
276 unsigned long nb;
Paul Mackerras081f3232012-06-01 20:20:24 +1000277 unsigned long gpa;
278
279 /*
280 * We need to pin the page pointed to by vpap->next_gpa,
281 * but we can't call kvmppc_pin_guest_page under the lock
282 * as it does get_user_pages() and down_read(). So we
283 * have to drop the lock, pin the page, then get the lock
284 * again and check that a new area didn't get registered
285 * in the meantime.
286 */
287 for (;;) {
288 gpa = vpap->next_gpa;
289 spin_unlock(&vcpu->arch.vpa_update_lock);
290 va = NULL;
291 nb = 0;
292 if (gpa)
293 va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
294 spin_lock(&vcpu->arch.vpa_update_lock);
295 if (gpa == vpap->next_gpa)
296 break;
297 /* sigh... unpin that one and try again */
298 if (va)
299 kvmppc_unpin_guest_page(kvm, va);
300 }
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000301
302 vpap->update_pending = 0;
Paul Mackerras081f3232012-06-01 20:20:24 +1000303 if (va && nb < vpap->len) {
304 /*
305 * If it's now too short, it must be that userspace
306 * has changed the mappings underlying guest memory,
307 * so unregister the region.
308 */
309 kvmppc_unpin_guest_page(kvm, va);
310 va = NULL;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000311 }
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000312 if (vpap->pinned_addr)
313 kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
314 vpap->pinned_addr = va;
315 if (va)
316 vpap->pinned_end = va + vpap->len;
317}
Paul Mackerras93e60242011-12-12 12:28:55 +0000318
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000319static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
320{
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000321 spin_lock(&vcpu->arch.vpa_update_lock);
322 if (vcpu->arch.vpa.update_pending) {
Paul Mackerras081f3232012-06-01 20:20:24 +1000323 kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000324 init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
325 }
326 if (vcpu->arch.dtl.update_pending) {
Paul Mackerras081f3232012-06-01 20:20:24 +1000327 kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000328 vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
329 vcpu->arch.dtl_index = 0;
330 }
331 if (vcpu->arch.slb_shadow.update_pending)
Paul Mackerras081f3232012-06-01 20:20:24 +1000332 kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000333 spin_unlock(&vcpu->arch.vpa_update_lock);
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000334}
335
Paul Mackerras0456ec42012-02-03 00:56:21 +0000336static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
337 struct kvmppc_vcore *vc)
338{
339 struct dtl_entry *dt;
340 struct lppaca *vpa;
341 unsigned long old_stolen;
342
343 dt = vcpu->arch.dtl_ptr;
344 vpa = vcpu->arch.vpa.pinned_addr;
345 old_stolen = vcpu->arch.stolen_logged;
346 vcpu->arch.stolen_logged = vc->stolen_tb;
347 if (!dt || !vpa)
348 return;
349 memset(dt, 0, sizeof(struct dtl_entry));
350 dt->dispatch_reason = 7;
351 dt->processor_id = vc->pcpu + vcpu->arch.ptid;
352 dt->timebase = mftb();
353 dt->enqueue_to_dispatch_time = vc->stolen_tb - old_stolen;
354 dt->srr0 = kvmppc_get_pc(vcpu);
355 dt->srr1 = vcpu->arch.shregs.msr;
356 ++dt;
357 if (dt == vcpu->arch.dtl.pinned_end)
358 dt = vcpu->arch.dtl.pinned_addr;
359 vcpu->arch.dtl_ptr = dt;
360 /* order writing *dt vs. writing vpa->dtl_idx */
361 smp_wmb();
362 vpa->dtl_idx = ++vcpu->arch.dtl_index;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000363}
364
365int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
366{
367 unsigned long req = kvmppc_get_gpr(vcpu, 3);
368 unsigned long target, ret = H_SUCCESS;
369 struct kvm_vcpu *tvcpu;
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000370 int idx;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000371
372 switch (req) {
Paul Mackerrasc77162d2011-12-12 12:31:00 +0000373 case H_ENTER:
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000374 idx = srcu_read_lock(&vcpu->kvm->srcu);
Paul Mackerrasc77162d2011-12-12 12:31:00 +0000375 ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
376 kvmppc_get_gpr(vcpu, 5),
377 kvmppc_get_gpr(vcpu, 6),
378 kvmppc_get_gpr(vcpu, 7));
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000379 srcu_read_unlock(&vcpu->kvm->srcu, idx);
Paul Mackerrasc77162d2011-12-12 12:31:00 +0000380 break;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000381 case H_CEDE:
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000382 break;
383 case H_PROD:
384 target = kvmppc_get_gpr(vcpu, 4);
385 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
386 if (!tvcpu) {
387 ret = H_PARAMETER;
388 break;
389 }
390 tvcpu->arch.prodded = 1;
391 smp_mb();
392 if (vcpu->arch.ceded) {
393 if (waitqueue_active(&vcpu->wq)) {
394 wake_up_interruptible(&vcpu->wq);
395 vcpu->stat.halt_wakeup++;
396 }
397 }
398 break;
399 case H_CONFER:
400 break;
401 case H_REGISTER_VPA:
402 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
403 kvmppc_get_gpr(vcpu, 5),
404 kvmppc_get_gpr(vcpu, 6));
405 break;
406 default:
407 return RESUME_HOST;
408 }
409 kvmppc_set_gpr(vcpu, 3, ret);
410 vcpu->arch.hcall_needed = 0;
411 return RESUME_GUEST;
412}
413
Paul Mackerrasde56a942011-06-29 00:21:34 +0000414static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
415 struct task_struct *tsk)
416{
417 int r = RESUME_HOST;
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000418 int srcu_idx;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000419
420 vcpu->stat.sum_exits++;
421
422 run->exit_reason = KVM_EXIT_UNKNOWN;
423 run->ready_for_interrupt_injection = 1;
424 switch (vcpu->arch.trap) {
425 /* We're good on these - the host merely wanted to get our attention */
426 case BOOK3S_INTERRUPT_HV_DECREMENTER:
427 vcpu->stat.dec_exits++;
428 r = RESUME_GUEST;
429 break;
430 case BOOK3S_INTERRUPT_EXTERNAL:
431 vcpu->stat.ext_intr_exits++;
432 r = RESUME_GUEST;
433 break;
434 case BOOK3S_INTERRUPT_PERFMON:
435 r = RESUME_GUEST;
436 break;
437 case BOOK3S_INTERRUPT_PROGRAM:
438 {
439 ulong flags;
440 /*
441 * Normally program interrupts are delivered directly
442 * to the guest by the hardware, but we can get here
443 * as a result of a hypervisor emulation interrupt
444 * (e40) getting turned into a 700 by BML RTAS.
445 */
446 flags = vcpu->arch.shregs.msr & 0x1f0000ull;
447 kvmppc_core_queue_program(vcpu, flags);
448 r = RESUME_GUEST;
449 break;
450 }
451 case BOOK3S_INTERRUPT_SYSCALL:
452 {
453 /* hcall - punt to userspace */
454 int i;
455
456 if (vcpu->arch.shregs.msr & MSR_PR) {
457 /* sc 1 from userspace - reflect to guest syscall */
458 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_SYSCALL);
459 r = RESUME_GUEST;
460 break;
461 }
462 run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
463 for (i = 0; i < 9; ++i)
464 run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
465 run->exit_reason = KVM_EXIT_PAPR_HCALL;
466 vcpu->arch.hcall_needed = 1;
467 r = RESUME_HOST;
468 break;
469 }
470 /*
Paul Mackerras342d3db2011-12-12 12:38:05 +0000471 * We get these next two if the guest accesses a page which it thinks
472 * it has mapped but which is not actually present, either because
473 * it is for an emulated I/O device or because the corresonding
474 * host page has been paged out. Any other HDSI/HISI interrupts
475 * have been handled already.
Paul Mackerrasde56a942011-06-29 00:21:34 +0000476 */
477 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000478 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
Paul Mackerras697d3892011-12-12 12:36:37 +0000479 r = kvmppc_book3s_hv_page_fault(run, vcpu,
480 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000481 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000482 break;
483 case BOOK3S_INTERRUPT_H_INST_STORAGE:
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000484 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
Paul Mackerras342d3db2011-12-12 12:38:05 +0000485 r = kvmppc_book3s_hv_page_fault(run, vcpu,
486 kvmppc_get_pc(vcpu), 0);
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000487 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000488 break;
489 /*
490 * This occurs if the guest executes an illegal instruction.
491 * We just generate a program interrupt to the guest, since
492 * we don't emulate any guest instructions at this stage.
493 */
494 case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
495 kvmppc_core_queue_program(vcpu, 0x80000);
496 r = RESUME_GUEST;
497 break;
498 default:
499 kvmppc_dump_regs(vcpu);
500 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
501 vcpu->arch.trap, kvmppc_get_pc(vcpu),
502 vcpu->arch.shregs.msr);
503 r = RESUME_HOST;
504 BUG();
505 break;
506 }
507
Paul Mackerrasde56a942011-06-29 00:21:34 +0000508 return r;
509}
510
511int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
512 struct kvm_sregs *sregs)
513{
514 int i;
515
516 sregs->pvr = vcpu->arch.pvr;
517
518 memset(sregs, 0, sizeof(struct kvm_sregs));
519 for (i = 0; i < vcpu->arch.slb_max; i++) {
520 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
521 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
522 }
523
524 return 0;
525}
526
527int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
528 struct kvm_sregs *sregs)
529{
530 int i, j;
531
532 kvmppc_set_pvr(vcpu, sregs->pvr);
533
534 j = 0;
535 for (i = 0; i < vcpu->arch.slb_nr; i++) {
536 if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) {
537 vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe;
538 vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv;
539 ++j;
540 }
541 }
542 vcpu->arch.slb_max = j;
543
544 return 0;
545}
546
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000547int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
Paul Mackerras31f34382011-12-12 12:26:50 +0000548{
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000549 int r = 0;
550 long int i;
Paul Mackerras31f34382011-12-12 12:26:50 +0000551
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000552 switch (id) {
Paul Mackerras31f34382011-12-12 12:26:50 +0000553 case KVM_REG_PPC_HIOR:
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000554 *val = get_reg_val(id, 0);
555 break;
556 case KVM_REG_PPC_DABR:
557 *val = get_reg_val(id, vcpu->arch.dabr);
558 break;
559 case KVM_REG_PPC_DSCR:
560 *val = get_reg_val(id, vcpu->arch.dscr);
561 break;
562 case KVM_REG_PPC_PURR:
563 *val = get_reg_val(id, vcpu->arch.purr);
564 break;
565 case KVM_REG_PPC_SPURR:
566 *val = get_reg_val(id, vcpu->arch.spurr);
567 break;
568 case KVM_REG_PPC_AMR:
569 *val = get_reg_val(id, vcpu->arch.amr);
570 break;
571 case KVM_REG_PPC_UAMOR:
572 *val = get_reg_val(id, vcpu->arch.uamor);
573 break;
574 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA:
575 i = id - KVM_REG_PPC_MMCR0;
576 *val = get_reg_val(id, vcpu->arch.mmcr[i]);
577 break;
578 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
579 i = id - KVM_REG_PPC_PMC1;
580 *val = get_reg_val(id, vcpu->arch.pmc[i]);
Paul Mackerras31f34382011-12-12 12:26:50 +0000581 break;
582 default:
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000583 r = -EINVAL;
Paul Mackerras31f34382011-12-12 12:26:50 +0000584 break;
585 }
586
587 return r;
588}
589
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000590int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
Paul Mackerras31f34382011-12-12 12:26:50 +0000591{
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000592 int r = 0;
593 long int i;
Paul Mackerras31f34382011-12-12 12:26:50 +0000594
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000595 switch (id) {
Paul Mackerras31f34382011-12-12 12:26:50 +0000596 case KVM_REG_PPC_HIOR:
Paul Mackerras31f34382011-12-12 12:26:50 +0000597 /* Only allow this to be set to zero */
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000598 if (set_reg_val(id, *val))
Paul Mackerras31f34382011-12-12 12:26:50 +0000599 r = -EINVAL;
600 break;
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000601 case KVM_REG_PPC_DABR:
602 vcpu->arch.dabr = set_reg_val(id, *val);
603 break;
604 case KVM_REG_PPC_DSCR:
605 vcpu->arch.dscr = set_reg_val(id, *val);
606 break;
607 case KVM_REG_PPC_PURR:
608 vcpu->arch.purr = set_reg_val(id, *val);
609 break;
610 case KVM_REG_PPC_SPURR:
611 vcpu->arch.spurr = set_reg_val(id, *val);
612 break;
613 case KVM_REG_PPC_AMR:
614 vcpu->arch.amr = set_reg_val(id, *val);
615 break;
616 case KVM_REG_PPC_UAMOR:
617 vcpu->arch.uamor = set_reg_val(id, *val);
618 break;
619 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA:
620 i = id - KVM_REG_PPC_MMCR0;
621 vcpu->arch.mmcr[i] = set_reg_val(id, *val);
622 break;
623 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
624 i = id - KVM_REG_PPC_PMC1;
625 vcpu->arch.pmc[i] = set_reg_val(id, *val);
626 break;
Paul Mackerras31f34382011-12-12 12:26:50 +0000627 default:
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000628 r = -EINVAL;
Paul Mackerras31f34382011-12-12 12:26:50 +0000629 break;
630 }
631
632 return r;
633}
634
Paul Mackerrasde56a942011-06-29 00:21:34 +0000635int kvmppc_core_check_processor_compat(void)
636{
Paul Mackerras9e368f22011-06-29 00:40:08 +0000637 if (cpu_has_feature(CPU_FTR_HVMODE))
Paul Mackerrasde56a942011-06-29 00:21:34 +0000638 return 0;
639 return -EIO;
640}
641
642struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
643{
644 struct kvm_vcpu *vcpu;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000645 int err = -EINVAL;
646 int core;
647 struct kvmppc_vcore *vcore;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000648
Paul Mackerras371fefd2011-06-29 00:23:08 +0000649 core = id / threads_per_core;
650 if (core >= KVM_MAX_VCORES)
651 goto out;
652
653 err = -ENOMEM;
Sasha Levin6b75e6b2011-12-07 10:24:56 +0200654 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000655 if (!vcpu)
656 goto out;
657
658 err = kvm_vcpu_init(vcpu, kvm, id);
659 if (err)
660 goto free_vcpu;
661
662 vcpu->arch.shared = &vcpu->arch.shregs;
663 vcpu->arch.last_cpu = -1;
664 vcpu->arch.mmcr[0] = MMCR0_FC;
665 vcpu->arch.ctrl = CTRL_RUNLATCH;
666 /* default to host PVR, since we can't spoof it */
667 vcpu->arch.pvr = mfspr(SPRN_PVR);
668 kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000669 spin_lock_init(&vcpu->arch.vpa_update_lock);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000670
Paul Mackerrasde56a942011-06-29 00:21:34 +0000671 kvmppc_mmu_book3s_hv_init(vcpu);
672
Paul Mackerras371fefd2011-06-29 00:23:08 +0000673 /*
Paul Mackerras19ccb762011-07-23 17:42:46 +1000674 * We consider the vcpu stopped until we see the first run ioctl for it.
Paul Mackerras371fefd2011-06-29 00:23:08 +0000675 */
Paul Mackerras19ccb762011-07-23 17:42:46 +1000676 vcpu->arch.state = KVMPPC_VCPU_STOPPED;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000677
678 init_waitqueue_head(&vcpu->arch.cpu_run);
679
680 mutex_lock(&kvm->lock);
681 vcore = kvm->arch.vcores[core];
682 if (!vcore) {
683 vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
684 if (vcore) {
685 INIT_LIST_HEAD(&vcore->runnable_threads);
686 spin_lock_init(&vcore->lock);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000687 init_waitqueue_head(&vcore->wq);
Paul Mackerras0456ec42012-02-03 00:56:21 +0000688 vcore->preempt_tb = mftb();
Paul Mackerras371fefd2011-06-29 00:23:08 +0000689 }
690 kvm->arch.vcores[core] = vcore;
691 }
692 mutex_unlock(&kvm->lock);
693
694 if (!vcore)
695 goto free_vcpu;
696
697 spin_lock(&vcore->lock);
698 ++vcore->num_threads;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000699 spin_unlock(&vcore->lock);
700 vcpu->arch.vcore = vcore;
Paul Mackerras0456ec42012-02-03 00:56:21 +0000701 vcpu->arch.stolen_logged = vcore->stolen_tb;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000702
Alexander Grafaf8f38b2011-08-10 13:57:08 +0200703 vcpu->arch.cpu_type = KVM_CPU_3S_64;
704 kvmppc_sanity_check(vcpu);
705
Paul Mackerrasde56a942011-06-29 00:21:34 +0000706 return vcpu;
707
708free_vcpu:
Sasha Levin6b75e6b2011-12-07 10:24:56 +0200709 kmem_cache_free(kvm_vcpu_cache, vcpu);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000710out:
711 return ERR_PTR(err);
712}
713
714void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
715{
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000716 spin_lock(&vcpu->arch.vpa_update_lock);
717 if (vcpu->arch.dtl.pinned_addr)
718 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr);
719 if (vcpu->arch.slb_shadow.pinned_addr)
720 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr);
721 if (vcpu->arch.vpa.pinned_addr)
722 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
723 spin_unlock(&vcpu->arch.vpa_update_lock);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000724 kvm_vcpu_uninit(vcpu);
Sasha Levin6b75e6b2011-12-07 10:24:56 +0200725 kmem_cache_free(kvm_vcpu_cache, vcpu);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000726}
727
Paul Mackerras19ccb762011-07-23 17:42:46 +1000728static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
Paul Mackerrasde56a942011-06-29 00:21:34 +0000729{
Paul Mackerras19ccb762011-07-23 17:42:46 +1000730 unsigned long dec_nsec, now;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000731
Paul Mackerras19ccb762011-07-23 17:42:46 +1000732 now = get_tb();
733 if (now > vcpu->arch.dec_expires) {
734 /* decrementer has already gone negative */
735 kvmppc_core_queue_dec(vcpu);
Scott Wood7e28e60e2011-11-08 18:23:20 -0600736 kvmppc_core_prepare_to_enter(vcpu);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000737 return;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000738 }
Paul Mackerras19ccb762011-07-23 17:42:46 +1000739 dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
740 / tb_ticks_per_sec;
741 hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
742 HRTIMER_MODE_REL);
743 vcpu->arch.timer_running = 1;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000744}
745
Paul Mackerras19ccb762011-07-23 17:42:46 +1000746static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
Paul Mackerras371fefd2011-06-29 00:23:08 +0000747{
Paul Mackerras19ccb762011-07-23 17:42:46 +1000748 vcpu->arch.ceded = 0;
749 if (vcpu->arch.timer_running) {
750 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
751 vcpu->arch.timer_running = 0;
752 }
Paul Mackerras371fefd2011-06-29 00:23:08 +0000753}
754
755extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
756extern void xics_wake_cpu(int cpu);
757
758static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
759 struct kvm_vcpu *vcpu)
760{
Paul Mackerras371fefd2011-06-29 00:23:08 +0000761 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
762 return;
763 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
764 --vc->n_runnable;
Paul Mackerras19ccb762011-07-23 17:42:46 +1000765 ++vc->n_busy;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000766 list_del(&vcpu->arch.run_list);
767}
768
Paul Mackerrasf0888f72012-02-03 00:54:17 +0000769static int kvmppc_grab_hwthread(int cpu)
770{
771 struct paca_struct *tpaca;
772 long timeout = 1000;
773
774 tpaca = &paca[cpu];
775
776 /* Ensure the thread won't go into the kernel if it wakes */
777 tpaca->kvm_hstate.hwthread_req = 1;
778
779 /*
780 * If the thread is already executing in the kernel (e.g. handling
781 * a stray interrupt), wait for it to get back to nap mode.
782 * The smp_mb() is to ensure that our setting of hwthread_req
783 * is visible before we look at hwthread_state, so if this
784 * races with the code at system_reset_pSeries and the thread
785 * misses our setting of hwthread_req, we are sure to see its
786 * setting of hwthread_state, and vice versa.
787 */
788 smp_mb();
789 while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) {
790 if (--timeout <= 0) {
791 pr_err("KVM: couldn't grab cpu %d\n", cpu);
792 return -EBUSY;
793 }
794 udelay(1);
795 }
796 return 0;
797}
798
799static void kvmppc_release_hwthread(int cpu)
800{
801 struct paca_struct *tpaca;
802
803 tpaca = &paca[cpu];
804 tpaca->kvm_hstate.hwthread_req = 0;
805 tpaca->kvm_hstate.kvm_vcpu = NULL;
806}
807
Paul Mackerras371fefd2011-06-29 00:23:08 +0000808static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
809{
810 int cpu;
811 struct paca_struct *tpaca;
812 struct kvmppc_vcore *vc = vcpu->arch.vcore;
813
Paul Mackerras19ccb762011-07-23 17:42:46 +1000814 if (vcpu->arch.timer_running) {
815 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
816 vcpu->arch.timer_running = 0;
817 }
Paul Mackerras371fefd2011-06-29 00:23:08 +0000818 cpu = vc->pcpu + vcpu->arch.ptid;
819 tpaca = &paca[cpu];
820 tpaca->kvm_hstate.kvm_vcpu = vcpu;
821 tpaca->kvm_hstate.kvm_vcore = vc;
Paul Mackerras19ccb762011-07-23 17:42:46 +1000822 tpaca->kvm_hstate.napping = 0;
823 vcpu->cpu = vc->pcpu;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000824 smp_wmb();
Michael Neuling251da032011-11-10 16:03:20 +0000825#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
Paul Mackerras371fefd2011-06-29 00:23:08 +0000826 if (vcpu->arch.ptid) {
Paul Mackerrasf0888f72012-02-03 00:54:17 +0000827 kvmppc_grab_hwthread(cpu);
Paul Mackerras371fefd2011-06-29 00:23:08 +0000828 xics_wake_cpu(cpu);
829 ++vc->n_woken;
830 }
831#endif
832}
833
834static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
835{
836 int i;
837
838 HMT_low();
839 i = 0;
840 while (vc->nap_count < vc->n_woken) {
841 if (++i >= 1000000) {
842 pr_err("kvmppc_wait_for_nap timeout %d %d\n",
843 vc->nap_count, vc->n_woken);
844 break;
845 }
846 cpu_relax();
847 }
848 HMT_medium();
849}
850
851/*
852 * Check that we are on thread 0 and that any other threads in
853 * this core are off-line.
854 */
855static int on_primary_thread(void)
856{
857 int cpu = smp_processor_id();
858 int thr = cpu_thread_in_core(cpu);
859
860 if (thr)
861 return 0;
862 while (++thr < threads_per_core)
863 if (cpu_online(cpu + thr))
864 return 0;
865 return 1;
866}
867
868/*
869 * Run a set of guest threads on a physical core.
870 * Called with vc->lock held.
871 */
872static int kvmppc_run_core(struct kvmppc_vcore *vc)
873{
Paul Mackerras19ccb762011-07-23 17:42:46 +1000874 struct kvm_vcpu *vcpu, *vcpu0, *vnext;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000875 long ret;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000876 u64 now;
Paul Mackerras081f3232012-06-01 20:20:24 +1000877 int ptid, i, need_vpa_update;
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000878 int srcu_idx;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000879
Paul Mackerras371fefd2011-06-29 00:23:08 +0000880 /* don't start if any threads have a signal pending */
Paul Mackerras081f3232012-06-01 20:20:24 +1000881 need_vpa_update = 0;
882 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
Paul Mackerras371fefd2011-06-29 00:23:08 +0000883 if (signal_pending(vcpu->arch.run_task))
884 return 0;
Paul Mackerras081f3232012-06-01 20:20:24 +1000885 need_vpa_update |= vcpu->arch.vpa.update_pending |
886 vcpu->arch.slb_shadow.update_pending |
887 vcpu->arch.dtl.update_pending;
888 }
889
890 /*
891 * Initialize *vc, in particular vc->vcore_state, so we can
892 * drop the vcore lock if necessary.
893 */
894 vc->n_woken = 0;
895 vc->nap_count = 0;
896 vc->entry_exit_count = 0;
897 vc->vcore_state = VCORE_RUNNING;
898 vc->in_guest = 0;
899 vc->napping_threads = 0;
900
901 /*
902 * Updating any of the vpas requires calling kvmppc_pin_guest_page,
903 * which can't be called with any spinlocks held.
904 */
905 if (need_vpa_update) {
906 spin_unlock(&vc->lock);
907 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
908 kvmppc_update_vpas(vcpu);
909 spin_lock(&vc->lock);
910 }
Paul Mackerrasde56a942011-06-29 00:21:34 +0000911
912 /*
913 * Make sure we are running on thread 0, and that
914 * secondary threads are offline.
915 * XXX we should also block attempts to bring any
916 * secondary threads online.
917 */
Paul Mackerras371fefd2011-06-29 00:23:08 +0000918 if (threads_per_core > 1 && !on_primary_thread()) {
919 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
920 vcpu->arch.ret = -EBUSY;
921 goto out;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000922 }
923
Paul Mackerras19ccb762011-07-23 17:42:46 +1000924 /*
925 * Assign physical thread IDs, first to non-ceded vcpus
926 * and then to ceded ones.
927 */
928 ptid = 0;
929 vcpu0 = NULL;
930 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
931 if (!vcpu->arch.ceded) {
932 if (!ptid)
933 vcpu0 = vcpu;
934 vcpu->arch.ptid = ptid++;
935 }
936 }
937 if (!vcpu0)
938 return 0; /* nothing to run */
939 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
940 if (vcpu->arch.ceded)
941 vcpu->arch.ptid = ptid++;
942
Paul Mackerras0456ec42012-02-03 00:56:21 +0000943 vc->stolen_tb += mftb() - vc->preempt_tb;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000944 vc->pcpu = smp_processor_id();
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000945 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
Paul Mackerras371fefd2011-06-29 00:23:08 +0000946 kvmppc_start_thread(vcpu);
Paul Mackerras0456ec42012-02-03 00:56:21 +0000947 kvmppc_create_dtl_entry(vcpu, vc);
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000948 }
Paul Mackerrasf0888f72012-02-03 00:54:17 +0000949 /* Grab any remaining hw threads so they can't go into the kernel */
950 for (i = ptid; i < threads_per_core; ++i)
951 kvmppc_grab_hwthread(vc->pcpu + i);
Paul Mackerras371fefd2011-06-29 00:23:08 +0000952
953 preempt_disable();
Paul Mackerras19ccb762011-07-23 17:42:46 +1000954 spin_unlock(&vc->lock);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000955
Paul Mackerras19ccb762011-07-23 17:42:46 +1000956 kvm_guest_enter();
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000957
958 srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu);
959
Paul Mackerras19ccb762011-07-23 17:42:46 +1000960 __kvmppc_vcore_entry(NULL, vcpu0);
Paul Mackerrasf0888f72012-02-03 00:54:17 +0000961 for (i = 0; i < threads_per_core; ++i)
962 kvmppc_release_hwthread(vc->pcpu + i);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000963
Paul Mackerras371fefd2011-06-29 00:23:08 +0000964 spin_lock(&vc->lock);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000965 /* disable sending of IPIs on virtual external irqs */
966 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
967 vcpu->cpu = -1;
968 /* wait for secondary threads to finish writing their state to memory */
Paul Mackerras371fefd2011-06-29 00:23:08 +0000969 if (vc->nap_count < vc->n_woken)
970 kvmppc_wait_for_nap(vc);
971 /* prevent other vcpu threads from doing kvmppc_start_thread() now */
Paul Mackerras19ccb762011-07-23 17:42:46 +1000972 vc->vcore_state = VCORE_EXITING;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000973 spin_unlock(&vc->lock);
974
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000975 srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx);
976
Paul Mackerras371fefd2011-06-29 00:23:08 +0000977 /* make sure updates to secondary vcpu structs are visible now */
978 smp_mb();
Paul Mackerrasde56a942011-06-29 00:21:34 +0000979 kvm_guest_exit();
980
981 preempt_enable();
982 kvm_resched(vcpu);
983
984 now = get_tb();
Paul Mackerras371fefd2011-06-29 00:23:08 +0000985 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
986 /* cancel pending dec exception if dec is positive */
987 if (now < vcpu->arch.dec_expires &&
988 kvmppc_core_pending_dec(vcpu))
989 kvmppc_core_dequeue_dec(vcpu);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000990
991 ret = RESUME_GUEST;
992 if (vcpu->arch.trap)
993 ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
994 vcpu->arch.run_task);
995
Paul Mackerras371fefd2011-06-29 00:23:08 +0000996 vcpu->arch.ret = ret;
997 vcpu->arch.trap = 0;
Paul Mackerras19ccb762011-07-23 17:42:46 +1000998
999 if (vcpu->arch.ceded) {
1000 if (ret != RESUME_GUEST)
1001 kvmppc_end_cede(vcpu);
1002 else
1003 kvmppc_set_timer(vcpu);
1004 }
Paul Mackerras371fefd2011-06-29 00:23:08 +00001005 }
Paul Mackerrasde56a942011-06-29 00:21:34 +00001006
Paul Mackerras371fefd2011-06-29 00:23:08 +00001007 spin_lock(&vc->lock);
Paul Mackerrasde56a942011-06-29 00:21:34 +00001008 out:
Paul Mackerras19ccb762011-07-23 17:42:46 +10001009 vc->vcore_state = VCORE_INACTIVE;
Paul Mackerras0456ec42012-02-03 00:56:21 +00001010 vc->preempt_tb = mftb();
Paul Mackerras371fefd2011-06-29 00:23:08 +00001011 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
1012 arch.run_list) {
1013 if (vcpu->arch.ret != RESUME_GUEST) {
1014 kvmppc_remove_runnable(vc, vcpu);
1015 wake_up(&vcpu->arch.cpu_run);
1016 }
1017 }
1018
1019 return 1;
1020}
1021
Paul Mackerras19ccb762011-07-23 17:42:46 +10001022/*
1023 * Wait for some other vcpu thread to execute us, and
1024 * wake us up when we need to handle something in the host.
1025 */
1026static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
Paul Mackerras371fefd2011-06-29 00:23:08 +00001027{
Paul Mackerras371fefd2011-06-29 00:23:08 +00001028 DEFINE_WAIT(wait);
1029
Paul Mackerras19ccb762011-07-23 17:42:46 +10001030 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
1031 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
1032 schedule();
1033 finish_wait(&vcpu->arch.cpu_run, &wait);
1034}
Paul Mackerras371fefd2011-06-29 00:23:08 +00001035
Paul Mackerras19ccb762011-07-23 17:42:46 +10001036/*
1037 * All the vcpus in this vcore are idle, so wait for a decrementer
1038 * or external interrupt to one of the vcpus. vc->lock is held.
1039 */
1040static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
1041{
1042 DEFINE_WAIT(wait);
1043 struct kvm_vcpu *v;
1044 int all_idle = 1;
1045
1046 prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
1047 vc->vcore_state = VCORE_SLEEPING;
1048 spin_unlock(&vc->lock);
1049 list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
1050 if (!v->arch.ceded || v->arch.pending_exceptions) {
1051 all_idle = 0;
1052 break;
1053 }
1054 }
1055 if (all_idle)
1056 schedule();
1057 finish_wait(&vc->wq, &wait);
1058 spin_lock(&vc->lock);
1059 vc->vcore_state = VCORE_INACTIVE;
1060}
1061
1062static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1063{
1064 int n_ceded;
1065 int prev_state;
1066 struct kvmppc_vcore *vc;
1067 struct kvm_vcpu *v, *vn;
Paul Mackerras9e368f22011-06-29 00:40:08 +00001068
Paul Mackerras371fefd2011-06-29 00:23:08 +00001069 kvm_run->exit_reason = 0;
1070 vcpu->arch.ret = RESUME_GUEST;
1071 vcpu->arch.trap = 0;
1072
Paul Mackerras371fefd2011-06-29 00:23:08 +00001073 /*
1074 * Synchronize with other threads in this virtual core
1075 */
1076 vc = vcpu->arch.vcore;
1077 spin_lock(&vc->lock);
Paul Mackerras19ccb762011-07-23 17:42:46 +10001078 vcpu->arch.ceded = 0;
Paul Mackerras371fefd2011-06-29 00:23:08 +00001079 vcpu->arch.run_task = current;
1080 vcpu->arch.kvm_run = kvm_run;
Paul Mackerras19ccb762011-07-23 17:42:46 +10001081 prev_state = vcpu->arch.state;
1082 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
Paul Mackerras371fefd2011-06-29 00:23:08 +00001083 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
1084 ++vc->n_runnable;
1085
Paul Mackerras19ccb762011-07-23 17:42:46 +10001086 /*
1087 * This happens the first time this is called for a vcpu.
1088 * If the vcore is already running, we may be able to start
1089 * this thread straight away and have it join in.
1090 */
1091 if (prev_state == KVMPPC_VCPU_STOPPED) {
1092 if (vc->vcore_state == VCORE_RUNNING &&
1093 VCORE_EXIT_COUNT(vc) == 0) {
1094 vcpu->arch.ptid = vc->n_runnable - 1;
Paul Mackerras371fefd2011-06-29 00:23:08 +00001095 kvmppc_start_thread(vcpu);
Paul Mackerras19ccb762011-07-23 17:42:46 +10001096 }
Paul Mackerras371fefd2011-06-29 00:23:08 +00001097
Paul Mackerras19ccb762011-07-23 17:42:46 +10001098 } else if (prev_state == KVMPPC_VCPU_BUSY_IN_HOST)
1099 --vc->n_busy;
1100
1101 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
1102 !signal_pending(current)) {
1103 if (vc->n_busy || vc->vcore_state != VCORE_INACTIVE) {
1104 spin_unlock(&vc->lock);
1105 kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
1106 spin_lock(&vc->lock);
1107 continue;
1108 }
Paul Mackerras0456ec42012-02-03 00:56:21 +00001109 vc->runner = vcpu;
Paul Mackerras19ccb762011-07-23 17:42:46 +10001110 n_ceded = 0;
1111 list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
1112 n_ceded += v->arch.ceded;
1113 if (n_ceded == vc->n_runnable)
1114 kvmppc_vcore_blocked(vc);
1115 else
1116 kvmppc_run_core(vc);
1117
1118 list_for_each_entry_safe(v, vn, &vc->runnable_threads,
1119 arch.run_list) {
Scott Wood7e28e60e2011-11-08 18:23:20 -06001120 kvmppc_core_prepare_to_enter(v);
Paul Mackerras19ccb762011-07-23 17:42:46 +10001121 if (signal_pending(v->arch.run_task)) {
1122 kvmppc_remove_runnable(vc, v);
1123 v->stat.signal_exits++;
1124 v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
1125 v->arch.ret = -EINTR;
1126 wake_up(&v->arch.cpu_run);
1127 }
1128 }
Paul Mackerras0456ec42012-02-03 00:56:21 +00001129 vc->runner = NULL;
Paul Mackerras371fefd2011-06-29 00:23:08 +00001130 }
1131
Paul Mackerras19ccb762011-07-23 17:42:46 +10001132 if (signal_pending(current)) {
1133 if (vc->vcore_state == VCORE_RUNNING ||
1134 vc->vcore_state == VCORE_EXITING) {
1135 spin_unlock(&vc->lock);
1136 kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
1137 spin_lock(&vc->lock);
1138 }
1139 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
1140 kvmppc_remove_runnable(vc, vcpu);
1141 vcpu->stat.signal_exits++;
1142 kvm_run->exit_reason = KVM_EXIT_INTR;
1143 vcpu->arch.ret = -EINTR;
1144 }
1145 }
Paul Mackerras371fefd2011-06-29 00:23:08 +00001146
Paul Mackerras19ccb762011-07-23 17:42:46 +10001147 spin_unlock(&vc->lock);
Paul Mackerras371fefd2011-06-29 00:23:08 +00001148 return vcpu->arch.ret;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001149}
1150
Paul Mackerrasa8606e22011-06-29 00:22:05 +00001151int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
1152{
1153 int r;
1154
Alexander Grafaf8f38b2011-08-10 13:57:08 +02001155 if (!vcpu->arch.sane) {
1156 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1157 return -EINVAL;
1158 }
1159
Scott Wood25051b52011-11-08 18:23:23 -06001160 kvmppc_core_prepare_to_enter(vcpu);
1161
Paul Mackerras19ccb762011-07-23 17:42:46 +10001162 /* No need to go into the guest when all we'll do is come back out */
1163 if (signal_pending(current)) {
1164 run->exit_reason = KVM_EXIT_INTR;
1165 return -EINTR;
1166 }
1167
Paul Mackerras32fad282012-05-04 02:32:53 +00001168 atomic_inc(&vcpu->kvm->arch.vcpus_running);
1169 /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
1170 smp_mb();
1171
1172 /* On the first time here, set up HTAB and VRMA or RMA */
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001173 if (!vcpu->kvm->arch.rma_setup_done) {
Paul Mackerras32fad282012-05-04 02:32:53 +00001174 r = kvmppc_hv_setup_htab_rma(vcpu);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001175 if (r)
Paul Mackerras32fad282012-05-04 02:32:53 +00001176 goto out;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001177 }
Paul Mackerras19ccb762011-07-23 17:42:46 +10001178
1179 flush_fp_to_thread(current);
1180 flush_altivec_to_thread(current);
1181 flush_vsx_to_thread(current);
1182 vcpu->arch.wqp = &vcpu->arch.vcore->wq;
Paul Mackerras342d3db2011-12-12 12:38:05 +00001183 vcpu->arch.pgdir = current->mm->pgd;
Paul Mackerras19ccb762011-07-23 17:42:46 +10001184
Paul Mackerrasa8606e22011-06-29 00:22:05 +00001185 do {
1186 r = kvmppc_run_vcpu(run, vcpu);
1187
1188 if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
1189 !(vcpu->arch.shregs.msr & MSR_PR)) {
1190 r = kvmppc_pseries_do_hcall(vcpu);
Scott Wood7e28e60e2011-11-08 18:23:20 -06001191 kvmppc_core_prepare_to_enter(vcpu);
Paul Mackerrasa8606e22011-06-29 00:22:05 +00001192 }
1193 } while (r == RESUME_GUEST);
Paul Mackerras32fad282012-05-04 02:32:53 +00001194
1195 out:
1196 atomic_dec(&vcpu->kvm->arch.vcpus_running);
Paul Mackerrasa8606e22011-06-29 00:22:05 +00001197 return r;
1198}
1199
David Gibson54738c02011-06-29 00:22:41 +00001200
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001201/* Work out RMLS (real mode limit selector) field value for a given RMA size.
Paul Mackerras9e368f22011-06-29 00:40:08 +00001202 Assumes POWER7 or PPC970. */
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001203static inline int lpcr_rmls(unsigned long rma_size)
1204{
1205 switch (rma_size) {
1206 case 32ul << 20: /* 32 MB */
Paul Mackerras9e368f22011-06-29 00:40:08 +00001207 if (cpu_has_feature(CPU_FTR_ARCH_206))
1208 return 8; /* only supported on POWER7 */
1209 return -1;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001210 case 64ul << 20: /* 64 MB */
1211 return 3;
1212 case 128ul << 20: /* 128 MB */
1213 return 7;
1214 case 256ul << 20: /* 256 MB */
1215 return 4;
1216 case 1ul << 30: /* 1 GB */
1217 return 2;
1218 case 16ul << 30: /* 16 GB */
1219 return 1;
1220 case 256ul << 30: /* 256 GB */
1221 return 0;
1222 default:
1223 return -1;
1224 }
1225}
1226
1227static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1228{
Alexander Grafb4e70612012-01-16 16:50:10 +01001229 struct kvmppc_linear_info *ri = vma->vm_file->private_data;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001230 struct page *page;
1231
1232 if (vmf->pgoff >= ri->npages)
1233 return VM_FAULT_SIGBUS;
1234
1235 page = pfn_to_page(ri->base_pfn + vmf->pgoff);
1236 get_page(page);
1237 vmf->page = page;
1238 return 0;
1239}
1240
1241static const struct vm_operations_struct kvm_rma_vm_ops = {
1242 .fault = kvm_rma_fault,
1243};
1244
1245static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
1246{
1247 vma->vm_flags |= VM_RESERVED;
1248 vma->vm_ops = &kvm_rma_vm_ops;
1249 return 0;
1250}
1251
1252static int kvm_rma_release(struct inode *inode, struct file *filp)
1253{
Alexander Grafb4e70612012-01-16 16:50:10 +01001254 struct kvmppc_linear_info *ri = filp->private_data;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001255
1256 kvm_release_rma(ri);
1257 return 0;
1258}
1259
1260static struct file_operations kvm_rma_fops = {
1261 .mmap = kvm_rma_mmap,
1262 .release = kvm_rma_release,
1263};
1264
1265long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
1266{
Alexander Grafb4e70612012-01-16 16:50:10 +01001267 struct kvmppc_linear_info *ri;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001268 long fd;
1269
1270 ri = kvm_alloc_rma();
1271 if (!ri)
1272 return -ENOMEM;
1273
1274 fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR);
1275 if (fd < 0)
1276 kvm_release_rma(ri);
1277
1278 ret->rma_size = ri->npages << PAGE_SHIFT;
1279 return fd;
1280}
1281
Benjamin Herrenschmidt5b747162012-04-26 19:43:42 +00001282static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
1283 int linux_psize)
1284{
1285 struct mmu_psize_def *def = &mmu_psize_defs[linux_psize];
1286
1287 if (!def->shift)
1288 return;
1289 (*sps)->page_shift = def->shift;
1290 (*sps)->slb_enc = def->sllp;
1291 (*sps)->enc[0].page_shift = def->shift;
1292 (*sps)->enc[0].pte_enc = def->penc;
1293 (*sps)++;
1294}
1295
1296int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
1297{
1298 struct kvm_ppc_one_seg_page_size *sps;
1299
1300 info->flags = KVM_PPC_PAGE_SIZES_REAL;
1301 if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
1302 info->flags |= KVM_PPC_1T_SEGMENTS;
1303 info->slb_size = mmu_slb_size;
1304
1305 /* We only support these sizes for now, and no muti-size segments */
1306 sps = &info->sps[0];
1307 kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K);
1308 kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K);
1309 kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M);
1310
1311 return 0;
1312}
1313
Paul Mackerras82ed3612011-12-15 02:03:22 +00001314/*
1315 * Get (and clear) the dirty memory log for a memory slot.
1316 */
1317int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
1318{
1319 struct kvm_memory_slot *memslot;
1320 int r;
1321 unsigned long n;
1322
1323 mutex_lock(&kvm->slots_lock);
1324
1325 r = -EINVAL;
1326 if (log->slot >= KVM_MEMORY_SLOTS)
1327 goto out;
1328
1329 memslot = id_to_memslot(kvm->memslots, log->slot);
1330 r = -ENOENT;
1331 if (!memslot->dirty_bitmap)
1332 goto out;
1333
1334 n = kvm_dirty_bitmap_bytes(memslot);
1335 memset(memslot->dirty_bitmap, 0, n);
1336
Paul Mackerrasdfe49db2012-09-11 13:28:18 +00001337 r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap);
Paul Mackerras82ed3612011-12-15 02:03:22 +00001338 if (r)
1339 goto out;
1340
1341 r = -EFAULT;
1342 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
1343 goto out;
1344
1345 r = 0;
1346out:
1347 mutex_unlock(&kvm->slots_lock);
1348 return r;
1349}
1350
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001351static unsigned long slb_pgsize_encoding(unsigned long psize)
1352{
1353 unsigned long senc = 0;
1354
1355 if (psize > 0x1000) {
1356 senc = SLB_VSID_L;
1357 if (psize == 0x10000)
1358 senc |= SLB_VSID_LP_01;
1359 }
1360 return senc;
1361}
1362
Paul Mackerrasa66b48c2012-09-11 13:27:46 +00001363static void unpin_slot(struct kvm_memory_slot *memslot)
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001364{
1365 unsigned long *physp;
1366 unsigned long j, npages, pfn;
1367 struct page *page;
1368
Paul Mackerrasa66b48c2012-09-11 13:27:46 +00001369 physp = memslot->arch.slot_phys;
1370 npages = memslot->npages;
1371 if (!physp)
1372 return;
1373 for (j = 0; j < npages; j++) {
1374 if (!(physp[j] & KVMPPC_GOT_PAGE))
1375 continue;
1376 pfn = physp[j] >> PAGE_SHIFT;
1377 page = pfn_to_page(pfn);
1378 SetPageDirty(page);
1379 put_page(page);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001380 }
1381}
1382
Paul Mackerrasa66b48c2012-09-11 13:27:46 +00001383void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
1384 struct kvm_memory_slot *dont)
1385{
1386 if (!dont || free->arch.rmap != dont->arch.rmap) {
1387 vfree(free->arch.rmap);
1388 free->arch.rmap = NULL;
1389 }
1390 if (!dont || free->arch.slot_phys != dont->arch.slot_phys) {
1391 unpin_slot(free);
1392 vfree(free->arch.slot_phys);
1393 free->arch.slot_phys = NULL;
1394 }
1395}
1396
1397int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
1398 unsigned long npages)
1399{
1400 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
1401 if (!slot->arch.rmap)
1402 return -ENOMEM;
1403 slot->arch.slot_phys = NULL;
1404
1405 return 0;
1406}
1407
1408int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1409 struct kvm_memory_slot *memslot,
1410 struct kvm_userspace_memory_region *mem)
1411{
1412 unsigned long *phys;
1413
1414 /* Allocate a slot_phys array if needed */
1415 phys = memslot->arch.slot_phys;
1416 if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) {
1417 phys = vzalloc(memslot->npages * sizeof(unsigned long));
1418 if (!phys)
1419 return -ENOMEM;
1420 memslot->arch.slot_phys = phys;
1421 }
1422
1423 return 0;
1424}
1425
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001426void kvmppc_core_commit_memory_region(struct kvm *kvm,
Paul Mackerrasdfe49db2012-09-11 13:28:18 +00001427 struct kvm_userspace_memory_region *mem,
1428 struct kvm_memory_slot old)
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001429{
Paul Mackerrasdfe49db2012-09-11 13:28:18 +00001430 unsigned long npages = mem->memory_size >> PAGE_SHIFT;
1431 struct kvm_memory_slot *memslot;
1432
1433 if (npages && old.npages) {
1434 /*
1435 * If modifying a memslot, reset all the rmap dirty bits.
1436 * If this is a new memslot, we don't need to do anything
1437 * since the rmap array starts out as all zeroes,
1438 * i.e. no pages are dirty.
1439 */
1440 memslot = id_to_memslot(kvm->memslots, mem->slot);
1441 kvmppc_hv_get_dirty_log(kvm, memslot, NULL);
1442 }
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001443}
1444
Paul Mackerras32fad282012-05-04 02:32:53 +00001445static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001446{
1447 int err = 0;
1448 struct kvm *kvm = vcpu->kvm;
Alexander Grafb4e70612012-01-16 16:50:10 +01001449 struct kvmppc_linear_info *ri = NULL;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001450 unsigned long hva;
1451 struct kvm_memory_slot *memslot;
1452 struct vm_area_struct *vma;
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001453 unsigned long lpcr, senc;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001454 unsigned long psize, porder;
1455 unsigned long rma_size;
1456 unsigned long rmls;
1457 unsigned long *physp;
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001458 unsigned long i, npages;
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001459 int srcu_idx;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001460
1461 mutex_lock(&kvm->lock);
1462 if (kvm->arch.rma_setup_done)
1463 goto out; /* another vcpu beat us to it */
1464
Paul Mackerras32fad282012-05-04 02:32:53 +00001465 /* Allocate hashed page table (if not done already) and reset it */
1466 if (!kvm->arch.hpt_virt) {
1467 err = kvmppc_alloc_hpt(kvm, NULL);
1468 if (err) {
1469 pr_err("KVM: Couldn't alloc HPT\n");
1470 goto out;
1471 }
1472 }
1473
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001474 /* Look up the memslot for guest physical address 0 */
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001475 srcu_idx = srcu_read_lock(&kvm->srcu);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001476 memslot = gfn_to_memslot(kvm, 0);
1477
1478 /* We must have some memory at 0 by now */
1479 err = -EINVAL;
1480 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001481 goto out_srcu;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001482
1483 /* Look up the VMA for the start of this memory slot */
1484 hva = memslot->userspace_addr;
1485 down_read(&current->mm->mmap_sem);
1486 vma = find_vma(current->mm, hva);
1487 if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
1488 goto up_out;
1489
1490 psize = vma_kernel_pagesize(vma);
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001491 porder = __ilog2(psize);
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001492
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001493 /* Is this one of our preallocated RMAs? */
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001494 if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
1495 hva == vma->vm_start)
1496 ri = vma->vm_file->private_data;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001497
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001498 up_read(&current->mm->mmap_sem);
1499
1500 if (!ri) {
1501 /* On POWER7, use VRMA; on PPC970, give up */
1502 err = -EPERM;
1503 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
1504 pr_err("KVM: CPU requires an RMO\n");
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001505 goto out_srcu;
Paul Mackerras9e368f22011-06-29 00:40:08 +00001506 }
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001507
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001508 /* We can handle 4k, 64k or 16M pages in the VRMA */
1509 err = -EINVAL;
1510 if (!(psize == 0x1000 || psize == 0x10000 ||
1511 psize == 0x1000000))
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001512 goto out_srcu;
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001513
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001514 /* Update VRMASD field in the LPCR */
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001515 senc = slb_pgsize_encoding(psize);
Paul Mackerras697d3892011-12-12 12:36:37 +00001516 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
1517 (VRMA_VSID << SLB_VSID_SHIFT_1T);
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001518 lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
1519 lpcr |= senc << (LPCR_VRMASD_SH - 4);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001520 kvm->arch.lpcr = lpcr;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001521
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001522 /* Create HPTEs in the hash page table for the VRMA */
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001523 kvmppc_map_vrma(vcpu, memslot, porder);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001524
1525 } else {
1526 /* Set up to use an RMO region */
1527 rma_size = ri->npages;
1528 if (rma_size > memslot->npages)
1529 rma_size = memslot->npages;
1530 rma_size <<= PAGE_SHIFT;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001531 rmls = lpcr_rmls(rma_size);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001532 err = -EINVAL;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001533 if (rmls < 0) {
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001534 pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001535 goto out_srcu;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001536 }
1537 atomic_inc(&ri->use_count);
1538 kvm->arch.rma = ri;
Paul Mackerras9e368f22011-06-29 00:40:08 +00001539
1540 /* Update LPCR and RMOR */
1541 lpcr = kvm->arch.lpcr;
1542 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
1543 /* PPC970; insert RMLS value (split field) in HID4 */
1544 lpcr &= ~((1ul << HID4_RMLS0_SH) |
1545 (3ul << HID4_RMLS2_SH));
1546 lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) |
1547 ((rmls & 3) << HID4_RMLS2_SH);
1548 /* RMOR is also in HID4 */
1549 lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
1550 << HID4_RMOR_SH;
1551 } else {
1552 /* POWER7 */
1553 lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
1554 lpcr |= rmls << LPCR_RMLS_SH;
1555 kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
1556 }
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001557 kvm->arch.lpcr = lpcr;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001558 pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001559 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001560
1561 /* Initialize phys addrs of pages in RMO */
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001562 npages = ri->npages;
1563 porder = __ilog2(npages);
Paul Mackerrasa66b48c2012-09-11 13:27:46 +00001564 physp = memslot->arch.slot_phys;
1565 if (physp) {
1566 if (npages > memslot->npages)
1567 npages = memslot->npages;
1568 spin_lock(&kvm->arch.slot_phys_lock);
1569 for (i = 0; i < npages; ++i)
1570 physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) +
1571 porder;
1572 spin_unlock(&kvm->arch.slot_phys_lock);
1573 }
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001574 }
1575
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001576 /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
1577 smp_wmb();
1578 kvm->arch.rma_setup_done = 1;
1579 err = 0;
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001580 out_srcu:
1581 srcu_read_unlock(&kvm->srcu, srcu_idx);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001582 out:
1583 mutex_unlock(&kvm->lock);
1584 return err;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001585
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001586 up_out:
1587 up_read(&current->mm->mmap_sem);
1588 goto out;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001589}
1590
1591int kvmppc_core_init_vm(struct kvm *kvm)
1592{
Paul Mackerras32fad282012-05-04 02:32:53 +00001593 unsigned long lpcr, lpid;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001594
Paul Mackerras32fad282012-05-04 02:32:53 +00001595 /* Allocate the guest's logical partition ID */
1596
1597 lpid = kvmppc_alloc_lpid();
1598 if (lpid < 0)
1599 return -ENOMEM;
1600 kvm->arch.lpid = lpid;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001601
David Gibson54738c02011-06-29 00:22:41 +00001602 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001603
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001604 kvm->arch.rma = NULL;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001605
Paul Mackerras9e368f22011-06-29 00:40:08 +00001606 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001607
Paul Mackerras9e368f22011-06-29 00:40:08 +00001608 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
1609 /* PPC970; HID4 is effectively the LPCR */
Paul Mackerras9e368f22011-06-29 00:40:08 +00001610 kvm->arch.host_lpid = 0;
1611 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
1612 lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
1613 lpcr |= ((lpid >> 4) << HID4_LPID1_SH) |
1614 ((lpid & 0xf) << HID4_LPID5_SH);
1615 } else {
1616 /* POWER7; init LPCR for virtual RMA mode */
1617 kvm->arch.host_lpid = mfspr(SPRN_LPID);
1618 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
1619 lpcr &= LPCR_PECE | LPCR_LPES;
1620 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
Paul Mackerras697d3892011-12-12 12:36:37 +00001621 LPCR_VPM0 | LPCR_VPM1;
1622 kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
1623 (VRMA_VSID << SLB_VSID_SHIFT_1T);
Paul Mackerras9e368f22011-06-29 00:40:08 +00001624 }
1625 kvm->arch.lpcr = lpcr;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001626
Paul Mackerras342d3db2011-12-12 12:38:05 +00001627 kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001628 spin_lock_init(&kvm->arch.slot_phys_lock);
David Gibson54738c02011-06-29 00:22:41 +00001629 return 0;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001630}
1631
1632void kvmppc_core_destroy_vm(struct kvm *kvm)
1633{
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001634 if (kvm->arch.rma) {
1635 kvm_release_rma(kvm->arch.rma);
1636 kvm->arch.rma = NULL;
1637 }
1638
Paul Mackerrasde56a942011-06-29 00:21:34 +00001639 kvmppc_free_hpt(kvm);
David Gibson54738c02011-06-29 00:22:41 +00001640 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
Paul Mackerrasde56a942011-06-29 00:21:34 +00001641}
1642
1643/* These are stubs for now */
1644void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
1645{
1646}
1647
1648/* We don't need to emulate any privileged instructions or dcbz */
1649int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
1650 unsigned int inst, int *advance)
1651{
1652 return EMULATE_FAIL;
1653}
1654
Alexander Graf54771e62012-05-04 14:55:12 +02001655int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
Paul Mackerrasde56a942011-06-29 00:21:34 +00001656{
1657 return EMULATE_FAIL;
1658}
1659
Alexander Graf54771e62012-05-04 14:55:12 +02001660int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
Paul Mackerrasde56a942011-06-29 00:21:34 +00001661{
1662 return EMULATE_FAIL;
1663}
1664
1665static int kvmppc_book3s_hv_init(void)
1666{
1667 int r;
1668
1669 r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1670
1671 if (r)
1672 return r;
1673
1674 r = kvmppc_mmu_hv_init();
1675
1676 return r;
1677}
1678
1679static void kvmppc_book3s_hv_exit(void)
1680{
1681 kvm_exit();
1682}
1683
1684module_init(kvmppc_book3s_hv_init);
1685module_exit(kvmppc_book3s_hv_exit);