blob: 747d4f9001555fd1cf26c570038cd962919899dc [file] [log] [blame]
David Hildenbranda3508fb2015-07-08 13:19:48 +02001/*
2 * kvm nested virtualization support for s390x
3 *
4 * Copyright IBM Corp. 2016
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
11 */
12#include <linux/vmalloc.h>
13#include <linux/kvm_host.h>
14#include <linux/bug.h>
15#include <linux/list.h>
16#include <linux/bitmap.h>
17#include <asm/gmap.h>
18#include <asm/mmu_context.h>
19#include <asm/sclp.h>
20#include <asm/nmi.h>
21#include "kvm-s390.h"
22#include "gaccess.h"
23
24struct vsie_page {
25 struct kvm_s390_sie_block scb_s; /* 0x0000 */
26 /* the pinned originial scb */
27 struct kvm_s390_sie_block *scb_o; /* 0x0200 */
28 /* the shadow gmap in use by the vsie_page */
29 struct gmap *gmap; /* 0x0208 */
30 __u8 reserved[0x1000 - 0x0210]; /* 0x0210 */
31} __packed;
32
33/* trigger a validity icpt for the given scb */
34static int set_validity_icpt(struct kvm_s390_sie_block *scb,
35 __u16 reason_code)
36{
37 scb->ipa = 0x1000;
38 scb->ipb = ((__u32) reason_code) << 16;
39 scb->icptcode = ICPT_VALIDITY;
40 return 1;
41}
42
43/* mark the prefix as unmapped, this will block the VSIE */
44static void prefix_unmapped(struct vsie_page *vsie_page)
45{
46 atomic_or(PROG_REQUEST, &vsie_page->scb_s.prog20);
47}
48
49/* mark the prefix as unmapped and wait until the VSIE has been left */
50static void prefix_unmapped_sync(struct vsie_page *vsie_page)
51{
52 prefix_unmapped(vsie_page);
53 if (vsie_page->scb_s.prog0c & PROG_IN_SIE)
54 atomic_or(CPUSTAT_STOP_INT, &vsie_page->scb_s.cpuflags);
55 while (vsie_page->scb_s.prog0c & PROG_IN_SIE)
56 cpu_relax();
57}
58
59/* mark the prefix as mapped, this will allow the VSIE to run */
60static void prefix_mapped(struct vsie_page *vsie_page)
61{
62 atomic_andnot(PROG_REQUEST, &vsie_page->scb_s.prog20);
63}
64
65
66/* copy the updated intervention request bits into the shadow scb */
67static void update_intervention_requests(struct vsie_page *vsie_page)
68{
69 const int bits = CPUSTAT_STOP_INT | CPUSTAT_IO_INT | CPUSTAT_EXT_INT;
70 int cpuflags;
71
72 cpuflags = atomic_read(&vsie_page->scb_o->cpuflags);
73 atomic_andnot(bits, &vsie_page->scb_s.cpuflags);
74 atomic_or(cpuflags & bits, &vsie_page->scb_s.cpuflags);
75}
76
77/* shadow (filter and validate) the cpuflags */
78static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
79{
80 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
81 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
82 int newflags, cpuflags = atomic_read(&scb_o->cpuflags);
83
84 /* we don't allow ESA/390 guests */
85 if (!(cpuflags & CPUSTAT_ZARCH))
86 return set_validity_icpt(scb_s, 0x0001U);
87
88 if (cpuflags & (CPUSTAT_RRF | CPUSTAT_MCDS))
89 return set_validity_icpt(scb_s, 0x0001U);
90 else if (cpuflags & (CPUSTAT_SLSV | CPUSTAT_SLSR))
91 return set_validity_icpt(scb_s, 0x0007U);
92
93 /* intervention requests will be set later */
94 newflags = CPUSTAT_ZARCH;
95
96 atomic_set(&scb_s->cpuflags, newflags);
97 return 0;
98}
99
100/* unshadow the scb, copying parameters back to the real scb */
101static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
102{
103 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
104 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
105
106 /* interception */
107 scb_o->icptcode = scb_s->icptcode;
108 scb_o->icptstatus = scb_s->icptstatus;
109 scb_o->ipa = scb_s->ipa;
110 scb_o->ipb = scb_s->ipb;
111 scb_o->gbea = scb_s->gbea;
112
113 /* timer */
114 scb_o->cputm = scb_s->cputm;
115 scb_o->ckc = scb_s->ckc;
116 scb_o->todpr = scb_s->todpr;
117
118 /* guest state */
119 scb_o->gpsw = scb_s->gpsw;
120 scb_o->gg14 = scb_s->gg14;
121 scb_o->gg15 = scb_s->gg15;
122 memcpy(scb_o->gcr, scb_s->gcr, 128);
123 scb_o->pp = scb_s->pp;
124
125 /* interrupt intercept */
126 switch (scb_s->icptcode) {
127 case ICPT_PROGI:
128 case ICPT_INSTPROGI:
129 case ICPT_EXTINT:
130 memcpy((void *)((u64)scb_o + 0xc0),
131 (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
132 break;
133 case ICPT_PARTEXEC:
134 /* MVPG only */
135 memcpy((void *)((u64)scb_o + 0xc0),
136 (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0);
137 break;
138 }
139
140 if (scb_s->ihcpu != 0xffffU)
141 scb_o->ihcpu = scb_s->ihcpu;
142}
143
144/*
145 * Setup the shadow scb by copying and checking the relevant parts of the g2
146 * provided scb.
147 *
148 * Returns: - 0 if the scb has been shadowed
149 * - > 0 if control has to be given to guest 2
150 */
151static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
152{
153 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
154 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
155 int rc;
156
157 /* make sure we don't have any leftovers when reusing the scb */
158 scb_s->icptcode = 0;
159 scb_s->eca = 0;
160 scb_s->ecb = 0;
161 scb_s->ecb2 = 0;
162 scb_s->ecb3 = 0;
163 scb_s->ecd = 0;
164
165 rc = prepare_cpuflags(vcpu, vsie_page);
166 if (rc)
167 goto out;
168
169 /* timer */
170 scb_s->cputm = scb_o->cputm;
171 scb_s->ckc = scb_o->ckc;
172 scb_s->todpr = scb_o->todpr;
173 scb_s->epoch = scb_o->epoch;
174
175 /* guest state */
176 scb_s->gpsw = scb_o->gpsw;
177 scb_s->gg14 = scb_o->gg14;
178 scb_s->gg15 = scb_o->gg15;
179 memcpy(scb_s->gcr, scb_o->gcr, 128);
180 scb_s->pp = scb_o->pp;
181
182 /* interception / execution handling */
183 scb_s->gbea = scb_o->gbea;
184 scb_s->lctl = scb_o->lctl;
185 scb_s->svcc = scb_o->svcc;
186 scb_s->ictl = scb_o->ictl;
187 /*
188 * SKEY handling functions can't deal with false setting of PTE invalid
189 * bits. Therefore we cannot provide interpretation and would later
190 * have to provide own emulation handlers.
191 */
192 scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
193 scb_s->icpua = scb_o->icpua;
194
195 /* SIE will do mso/msl validity and exception checks for us */
196 scb_s->msl = scb_o->msl & 0xfffffffffff00000UL;
197 scb_s->mso = scb_o->mso & 0xfffffffffff00000UL;
198 scb_s->prefix = scb_o->prefix;
199
200 /* We have to definetly flush the tlb if this scb never ran */
201 if (scb_s->ihcpu != 0xffffU)
202 scb_s->ihcpu = scb_o->ihcpu;
203
204 /* MVPG and Protection Exception Interpretation are always available */
205 scb_s->eca |= scb_o->eca & 0x01002000U;
206
207out:
208 if (rc)
209 unshadow_scb(vcpu, vsie_page);
210 return rc;
211}
212
213void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
214 unsigned long end)
215{
216 struct kvm *kvm = gmap->private;
217 struct vsie_page *cur;
218 unsigned long prefix;
219 struct page *page;
220 int i;
221
222 if (!gmap_is_shadow(gmap))
223 return;
224 if (start >= 1UL << 31)
225 /* We are only interested in prefix pages */
226 return;
227
228 /*
229 * Only new shadow blocks are added to the list during runtime,
230 * therefore we can safely reference them all the time.
231 */
232 for (i = 0; i < kvm->arch.vsie.page_count; i++) {
233 page = READ_ONCE(kvm->arch.vsie.pages[i]);
234 if (!page)
235 continue;
236 cur = page_to_virt(page);
237 if (READ_ONCE(cur->gmap) != gmap)
238 continue;
239 prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
240 /* with mso/msl, the prefix lies at an offset */
241 prefix += cur->scb_s.mso;
242 if (prefix <= end && start <= prefix + PAGE_SIZE - 1)
243 prefix_unmapped_sync(cur);
244 }
245}
246
247/*
248 * Map the first prefix page.
249 *
250 * The prefix will be protected, a gmap notifier will inform about unmaps.
251 * The shadow scb must not be executed until the prefix is remapped, this is
252 * guaranteed by properly handling PROG_REQUEST.
253 *
254 * Returns: - 0 on if successfully mapped or already mapped
255 * - > 0 if control has to be given to guest 2
256 * - -EAGAIN if the caller can retry immediately
257 * - -ENOMEM if out of memory
258 */
259static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
260{
261 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
262 u64 prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
263 int rc;
264
265 /* mark it as mapped so we can catch any concurrent unmappers */
266 prefix_mapped(vsie_page);
267
268 /* with mso/msl, the prefix lies at offset *mso* */
269 prefix += scb_s->mso;
270
271 rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
272 /*
273 * We don't have to mprotect, we will be called for all unshadows.
274 * SIE will detect if protection applies and trigger a validity.
275 */
276 if (rc)
277 prefix_unmapped(vsie_page);
278 if (rc > 0 || rc == -EFAULT)
279 rc = set_validity_icpt(scb_s, 0x0037U);
280 return rc;
281}
282
283/*
284 * Pin the guest page given by gpa and set hpa to the pinned host address.
285 * Will always be pinned writable.
286 *
287 * Returns: - 0 on success
288 * - -EINVAL if the gpa is not valid guest storage
289 * - -ENOMEM if out of memory
290 */
291static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
292{
293 struct page *page;
294 hva_t hva;
295 int rc;
296
297 hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
298 if (kvm_is_error_hva(hva))
299 return -EINVAL;
300 rc = get_user_pages_fast(hva, 1, 1, &page);
301 if (rc < 0)
302 return rc;
303 else if (rc != 1)
304 return -ENOMEM;
305 *hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK);
306 return 0;
307}
308
309/* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
310static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
311{
312 struct page *page;
313
314 page = virt_to_page(hpa);
315 set_page_dirty_lock(page);
316 put_page(page);
317 /* mark the page always as dirty for migration */
318 mark_page_dirty(kvm, gpa_to_gfn(gpa));
319}
320
321/* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
322static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
323{
324 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
325 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
326 hpa_t hpa;
327 gpa_t gpa;
328
329 hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
330 if (hpa) {
331 gpa = scb_o->scaol & ~0xfUL;
332 unpin_guest_page(vcpu->kvm, gpa, hpa);
333 scb_s->scaol = 0;
334 scb_s->scaoh = 0;
335 }
336}
337
338/*
339 * Instead of shadowing some blocks, we can simply forward them because the
340 * addresses in the scb are 64 bit long.
341 *
342 * This works as long as the data lies in one page. If blocks ever exceed one
343 * page, we have to fall back to shadowing.
344 *
345 * As we reuse the sca, the vcpu pointers contained in it are invalid. We must
346 * therefore not enable any facilities that access these pointers (e.g. SIGPIF).
347 *
348 * Returns: - 0 if all blocks were pinned.
349 * - > 0 if control has to be given to guest 2
350 * - -ENOMEM if out of memory
351 */
352static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
353{
354 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
355 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
356 hpa_t hpa;
357 gpa_t gpa;
358 int rc = 0;
359
360 gpa = scb_o->scaol & ~0xfUL;
361 if (gpa) {
362 if (!(gpa & ~0x1fffUL))
363 rc = set_validity_icpt(scb_s, 0x0038U);
364 else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu))
365 rc = set_validity_icpt(scb_s, 0x0011U);
366 else if ((gpa & PAGE_MASK) !=
367 ((gpa + sizeof(struct bsca_block) - 1) & PAGE_MASK))
368 rc = set_validity_icpt(scb_s, 0x003bU);
369 if (!rc) {
370 rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
371 if (rc == -EINVAL)
372 rc = set_validity_icpt(scb_s, 0x0034U);
373 }
374 if (rc)
375 goto unpin;
376 scb_s->scaoh = (u32)((u64)hpa >> 32);
377 scb_s->scaol = (u32)(u64)hpa;
378 }
379 return 0;
380unpin:
381 unpin_blocks(vcpu, vsie_page);
382 return rc;
383}
384
385/* unpin the scb provided by guest 2, marking it as dirty */
386static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
387 gpa_t gpa)
388{
389 hpa_t hpa = (hpa_t) vsie_page->scb_o;
390
391 if (hpa)
392 unpin_guest_page(vcpu->kvm, gpa, hpa);
393 vsie_page->scb_o = NULL;
394}
395
396/*
397 * Pin the scb at gpa provided by guest 2 at vsie_page->scb_o.
398 *
399 * Returns: - 0 if the scb was pinned.
400 * - > 0 if control has to be given to guest 2
401 * - -ENOMEM if out of memory
402 */
403static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
404 gpa_t gpa)
405{
406 hpa_t hpa;
407 int rc;
408
409 rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
410 if (rc == -EINVAL) {
411 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
412 if (!rc)
413 rc = 1;
414 }
415 if (!rc)
416 vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa;
417 return rc;
418}
419
420/*
421 * Inject a fault into guest 2.
422 *
423 * Returns: - > 0 if control has to be given to guest 2
424 * < 0 if an error occurred during injection.
425 */
426static int inject_fault(struct kvm_vcpu *vcpu, __u16 code, __u64 vaddr,
427 bool write_flag)
428{
429 struct kvm_s390_pgm_info pgm = {
430 .code = code,
431 .trans_exc_code =
432 /* 0-51: virtual address */
433 (vaddr & 0xfffffffffffff000UL) |
434 /* 52-53: store / fetch */
435 (((unsigned int) !write_flag) + 1) << 10,
436 /* 62-63: asce id (alway primary == 0) */
437 .exc_access_id = 0, /* always primary */
438 .op_access_id = 0, /* not MVPG */
439 };
440 int rc;
441
442 if (code == PGM_PROTECTION)
443 pgm.trans_exc_code |= 0x4UL;
444
445 rc = kvm_s390_inject_prog_irq(vcpu, &pgm);
446 return rc ? rc : 1;
447}
448
449/*
450 * Handle a fault during vsie execution on a gmap shadow.
451 *
452 * Returns: - 0 if the fault was resolved
453 * - > 0 if control has to be given to guest 2
454 * - < 0 if an error occurred
455 */
456static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
457{
458 int rc;
459
460 if (current->thread.gmap_int_code == PGM_PROTECTION)
461 /* we can directly forward all protection exceptions */
462 return inject_fault(vcpu, PGM_PROTECTION,
463 current->thread.gmap_addr, 1);
464
465 rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
466 current->thread.gmap_addr);
467 if (rc > 0) {
468 rc = inject_fault(vcpu, rc,
469 current->thread.gmap_addr,
470 current->thread.gmap_write_flag);
471 }
472 return rc;
473}
474
475static inline void clear_vsie_icpt(struct vsie_page *vsie_page)
476{
477 vsie_page->scb_s.icptcode = 0;
478}
479
480/*
481 * Run the vsie on a shadow scb and a shadow gmap, without any further
482 * sanity checks, handling SIE faults.
483 *
484 * Returns: - 0 everything went fine
485 * - > 0 if control has to be given to guest 2
486 * - < 0 if an error occurred
487 */
488static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
489{
490 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
491 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
492 int rc;
493
494 if (need_resched())
495 schedule();
496 if (test_cpu_flag(CIF_MCCK_PENDING))
497 s390_handle_mcck();
498
499 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
500 local_irq_disable();
501 kvm_guest_enter();
502 local_irq_enable();
503
504 rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
505
506 local_irq_disable();
507 kvm_guest_exit();
508 local_irq_enable();
509 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
510
511 if (rc > 0)
512 rc = 0; /* we could still have an icpt */
513 else if (rc == -EFAULT)
514 return handle_fault(vcpu, vsie_page);
515
516 switch (scb_s->icptcode) {
517 case ICPT_STOP:
518 /* stop not requested by g2 - must have been a kick */
519 if (!(atomic_read(&scb_o->cpuflags) & CPUSTAT_STOP_INT))
520 clear_vsie_icpt(vsie_page);
521 break;
522 case ICPT_VALIDITY:
523 if ((scb_s->ipa & 0xf000) != 0xf000)
524 scb_s->ipa += 0x1000;
525 break;
526 }
527 return rc;
528}
529
530static void release_gmap_shadow(struct vsie_page *vsie_page)
531{
532 if (vsie_page->gmap)
533 gmap_put(vsie_page->gmap);
534 WRITE_ONCE(vsie_page->gmap, NULL);
535}
536
537static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
538 struct vsie_page *vsie_page)
539{
540 unsigned long asce;
541 union ctlreg0 cr0;
542 struct gmap *gmap;
543 int edat;
544
545 asce = vcpu->arch.sie_block->gcr[1];
546 cr0.val = vcpu->arch.sie_block->gcr[0];
547 edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
548 edat += edat && test_kvm_facility(vcpu->kvm, 78);
549
550 gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
551 if (IS_ERR(gmap))
552 return PTR_ERR(gmap);
553 gmap->private = vcpu->kvm;
554 WRITE_ONCE(vsie_page->gmap, gmap);
555 return 0;
556}
557
558/*
559 * Run the vsie on a shadowed scb, managing the gmap shadow, handling
560 * prefix pages and faults.
561 *
562 * Returns: - 0 if no errors occurred
563 * - > 0 if control has to be given to guest 2
564 * - -ENOMEM if out of memory
565 */
566static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
567{
568 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
569 int rc = 0;
570
571 while (1) {
572 rc = acquire_gmap_shadow(vcpu, vsie_page);
573 if (!rc)
574 rc = map_prefix(vcpu, vsie_page);
575 if (!rc) {
576 gmap_enable(vsie_page->gmap);
577 update_intervention_requests(vsie_page);
578 rc = do_vsie_run(vcpu, vsie_page);
579 gmap_enable(vcpu->arch.gmap);
580 }
581 release_gmap_shadow(vsie_page);
582
583 if (rc == -EAGAIN)
584 rc = 0;
585 if (rc || scb_s->icptcode || signal_pending(current) ||
586 kvm_s390_vcpu_has_irq(vcpu, 0))
587 break;
588 };
589
590 if (rc == -EFAULT) {
591 /*
592 * Addressing exceptions are always presentes as intercepts.
593 * As addressing exceptions are suppressing and our guest 3 PSW
594 * points at the responsible instruction, we have to
595 * forward the PSW and set the ilc. If we can't read guest 3
596 * instruction, we can use an arbitrary ilc. Let's always use
597 * ilen = 4 for now, so we can avoid reading in guest 3 virtual
598 * memory. (we could also fake the shadow so the hardware
599 * handles it).
600 */
601 scb_s->icptcode = ICPT_PROGI;
602 scb_s->iprcc = PGM_ADDRESSING;
603 scb_s->pgmilc = 4;
604 scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4);
605 }
606 return rc;
607}
608
609/*
610 * Get or create a vsie page for a scb address.
611 *
612 * Returns: - address of a vsie page (cached or new one)
613 * - NULL if the same scb address is already used by another VCPU
614 * - ERR_PTR(-ENOMEM) if out of memory
615 */
616static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
617{
618 struct vsie_page *vsie_page;
619 struct page *page;
620 int nr_vcpus;
621
622 rcu_read_lock();
623 page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
624 rcu_read_unlock();
625 if (page) {
626 if (page_ref_inc_return(page) == 2)
627 return page_to_virt(page);
628 page_ref_dec(page);
629 }
630
631 /*
632 * We want at least #online_vcpus shadows, so every VCPU can execute
633 * the VSIE in parallel.
634 */
635 nr_vcpus = atomic_read(&kvm->online_vcpus);
636
637 mutex_lock(&kvm->arch.vsie.mutex);
638 if (kvm->arch.vsie.page_count < nr_vcpus) {
639 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
640 if (!page) {
641 mutex_unlock(&kvm->arch.vsie.mutex);
642 return ERR_PTR(-ENOMEM);
643 }
644 page_ref_inc(page);
645 kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page;
646 kvm->arch.vsie.page_count++;
647 } else {
648 /* reuse an existing entry that belongs to nobody */
649 while (true) {
650 page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
651 if (page_ref_inc_return(page) == 2)
652 break;
653 page_ref_dec(page);
654 kvm->arch.vsie.next++;
655 kvm->arch.vsie.next %= nr_vcpus;
656 }
657 radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
658 }
659 page->index = addr;
660 /* double use of the same address */
661 if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) {
662 page_ref_dec(page);
663 mutex_unlock(&kvm->arch.vsie.mutex);
664 return NULL;
665 }
666 mutex_unlock(&kvm->arch.vsie.mutex);
667
668 vsie_page = page_to_virt(page);
669 memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
670 vsie_page->scb_s.ihcpu = 0xffffU;
671 return vsie_page;
672}
673
674/* put a vsie page acquired via get_vsie_page */
675static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page)
676{
677 struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT);
678
679 page_ref_dec(page);
680}
681
682int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
683{
684 struct vsie_page *vsie_page;
685 unsigned long scb_addr;
686 int rc;
687
688 vcpu->stat.instruction_sie++;
689 if (!test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIEF2))
690 return -EOPNOTSUPP;
691 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
692 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
693
694 BUILD_BUG_ON(sizeof(struct vsie_page) != 4096);
695 scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL);
696
697 /* 512 byte alignment */
698 if (unlikely(scb_addr & 0x1ffUL))
699 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
700
701 if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0))
702 return 0;
703
704 vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
705 if (IS_ERR(vsie_page))
706 return PTR_ERR(vsie_page);
707 else if (!vsie_page)
708 /* double use of sie control block - simply do nothing */
709 return 0;
710
711 rc = pin_scb(vcpu, vsie_page, scb_addr);
712 if (rc)
713 goto out_put;
714 rc = shadow_scb(vcpu, vsie_page);
715 if (rc)
716 goto out_unpin_scb;
717 rc = pin_blocks(vcpu, vsie_page);
718 if (rc)
719 goto out_unshadow;
720 rc = vsie_run(vcpu, vsie_page);
721 unpin_blocks(vcpu, vsie_page);
722out_unshadow:
723 unshadow_scb(vcpu, vsie_page);
724out_unpin_scb:
725 unpin_scb(vcpu, vsie_page, scb_addr);
726out_put:
727 put_vsie_page(vcpu->kvm, vsie_page);
728
729 return rc < 0 ? rc : 0;
730}
731
732/* Init the vsie data structures. To be called when a vm is initialized. */
733void kvm_s390_vsie_init(struct kvm *kvm)
734{
735 mutex_init(&kvm->arch.vsie.mutex);
736 INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL);
737}
738
739/* Destroy the vsie data structures. To be called when a vm is destroyed. */
740void kvm_s390_vsie_destroy(struct kvm *kvm)
741{
742 struct page *page;
743 int i;
744
745 mutex_lock(&kvm->arch.vsie.mutex);
746 for (i = 0; i < kvm->arch.vsie.page_count; i++) {
747 page = kvm->arch.vsie.pages[i];
748 kvm->arch.vsie.pages[i] = NULL;
749 /* free the radix tree entry */
750 radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
751 __free_page(page);
752 }
753 kvm->arch.vsie.page_count = 0;
754 mutex_unlock(&kvm->arch.vsie.mutex);
755}