blob: 2839efcfc5ffec2e0c93d8b63e432ccf976cf9cd [file] [log] [blame]
David Hildenbranda3508fb2015-07-08 13:19:48 +02001/*
2 * kvm nested virtualization support for s390x
3 *
4 * Copyright IBM Corp. 2016
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
11 */
12#include <linux/vmalloc.h>
13#include <linux/kvm_host.h>
14#include <linux/bug.h>
15#include <linux/list.h>
16#include <linux/bitmap.h>
17#include <asm/gmap.h>
18#include <asm/mmu_context.h>
19#include <asm/sclp.h>
20#include <asm/nmi.h>
21#include "kvm-s390.h"
22#include "gaccess.h"
23
24struct vsie_page {
25 struct kvm_s390_sie_block scb_s; /* 0x0000 */
26 /* the pinned originial scb */
27 struct kvm_s390_sie_block *scb_o; /* 0x0200 */
28 /* the shadow gmap in use by the vsie_page */
29 struct gmap *gmap; /* 0x0208 */
30 __u8 reserved[0x1000 - 0x0210]; /* 0x0210 */
31} __packed;
32
33/* trigger a validity icpt for the given scb */
34static int set_validity_icpt(struct kvm_s390_sie_block *scb,
35 __u16 reason_code)
36{
37 scb->ipa = 0x1000;
38 scb->ipb = ((__u32) reason_code) << 16;
39 scb->icptcode = ICPT_VALIDITY;
40 return 1;
41}
42
43/* mark the prefix as unmapped, this will block the VSIE */
44static void prefix_unmapped(struct vsie_page *vsie_page)
45{
46 atomic_or(PROG_REQUEST, &vsie_page->scb_s.prog20);
47}
48
49/* mark the prefix as unmapped and wait until the VSIE has been left */
50static void prefix_unmapped_sync(struct vsie_page *vsie_page)
51{
52 prefix_unmapped(vsie_page);
53 if (vsie_page->scb_s.prog0c & PROG_IN_SIE)
54 atomic_or(CPUSTAT_STOP_INT, &vsie_page->scb_s.cpuflags);
55 while (vsie_page->scb_s.prog0c & PROG_IN_SIE)
56 cpu_relax();
57}
58
59/* mark the prefix as mapped, this will allow the VSIE to run */
60static void prefix_mapped(struct vsie_page *vsie_page)
61{
62 atomic_andnot(PROG_REQUEST, &vsie_page->scb_s.prog20);
63}
64
David Hildenbrand06d68a62016-04-22 13:50:09 +020065/* test if the prefix is mapped into the gmap shadow */
66static int prefix_is_mapped(struct vsie_page *vsie_page)
67{
68 return !(atomic_read(&vsie_page->scb_s.prog20) & PROG_REQUEST);
69}
David Hildenbranda3508fb2015-07-08 13:19:48 +020070
71/* copy the updated intervention request bits into the shadow scb */
72static void update_intervention_requests(struct vsie_page *vsie_page)
73{
74 const int bits = CPUSTAT_STOP_INT | CPUSTAT_IO_INT | CPUSTAT_EXT_INT;
75 int cpuflags;
76
77 cpuflags = atomic_read(&vsie_page->scb_o->cpuflags);
78 atomic_andnot(bits, &vsie_page->scb_s.cpuflags);
79 atomic_or(cpuflags & bits, &vsie_page->scb_s.cpuflags);
80}
81
82/* shadow (filter and validate) the cpuflags */
83static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
84{
85 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
86 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
87 int newflags, cpuflags = atomic_read(&scb_o->cpuflags);
88
89 /* we don't allow ESA/390 guests */
90 if (!(cpuflags & CPUSTAT_ZARCH))
91 return set_validity_icpt(scb_s, 0x0001U);
92
93 if (cpuflags & (CPUSTAT_RRF | CPUSTAT_MCDS))
94 return set_validity_icpt(scb_s, 0x0001U);
95 else if (cpuflags & (CPUSTAT_SLSV | CPUSTAT_SLSR))
96 return set_validity_icpt(scb_s, 0x0007U);
97
98 /* intervention requests will be set later */
99 newflags = CPUSTAT_ZARCH;
100
101 atomic_set(&scb_s->cpuflags, newflags);
102 return 0;
103}
104
105/* unshadow the scb, copying parameters back to the real scb */
106static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
107{
108 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
109 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
110
111 /* interception */
112 scb_o->icptcode = scb_s->icptcode;
113 scb_o->icptstatus = scb_s->icptstatus;
114 scb_o->ipa = scb_s->ipa;
115 scb_o->ipb = scb_s->ipb;
116 scb_o->gbea = scb_s->gbea;
117
118 /* timer */
119 scb_o->cputm = scb_s->cputm;
120 scb_o->ckc = scb_s->ckc;
121 scb_o->todpr = scb_s->todpr;
122
123 /* guest state */
124 scb_o->gpsw = scb_s->gpsw;
125 scb_o->gg14 = scb_s->gg14;
126 scb_o->gg15 = scb_s->gg15;
127 memcpy(scb_o->gcr, scb_s->gcr, 128);
128 scb_o->pp = scb_s->pp;
129
130 /* interrupt intercept */
131 switch (scb_s->icptcode) {
132 case ICPT_PROGI:
133 case ICPT_INSTPROGI:
134 case ICPT_EXTINT:
135 memcpy((void *)((u64)scb_o + 0xc0),
136 (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
137 break;
138 case ICPT_PARTEXEC:
139 /* MVPG only */
140 memcpy((void *)((u64)scb_o + 0xc0),
141 (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0);
142 break;
143 }
144
145 if (scb_s->ihcpu != 0xffffU)
146 scb_o->ihcpu = scb_s->ihcpu;
147}
148
149/*
150 * Setup the shadow scb by copying and checking the relevant parts of the g2
151 * provided scb.
152 *
153 * Returns: - 0 if the scb has been shadowed
154 * - > 0 if control has to be given to guest 2
155 */
156static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
157{
158 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
159 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
David Hildenbrand06d68a62016-04-22 13:50:09 +0200160 unsigned long new_mso;
David Hildenbranda3508fb2015-07-08 13:19:48 +0200161 int rc;
162
163 /* make sure we don't have any leftovers when reusing the scb */
164 scb_s->icptcode = 0;
165 scb_s->eca = 0;
166 scb_s->ecb = 0;
167 scb_s->ecb2 = 0;
168 scb_s->ecb3 = 0;
169 scb_s->ecd = 0;
170
171 rc = prepare_cpuflags(vcpu, vsie_page);
172 if (rc)
173 goto out;
174
175 /* timer */
176 scb_s->cputm = scb_o->cputm;
177 scb_s->ckc = scb_o->ckc;
178 scb_s->todpr = scb_o->todpr;
179 scb_s->epoch = scb_o->epoch;
180
181 /* guest state */
182 scb_s->gpsw = scb_o->gpsw;
183 scb_s->gg14 = scb_o->gg14;
184 scb_s->gg15 = scb_o->gg15;
185 memcpy(scb_s->gcr, scb_o->gcr, 128);
186 scb_s->pp = scb_o->pp;
187
188 /* interception / execution handling */
189 scb_s->gbea = scb_o->gbea;
190 scb_s->lctl = scb_o->lctl;
191 scb_s->svcc = scb_o->svcc;
192 scb_s->ictl = scb_o->ictl;
193 /*
194 * SKEY handling functions can't deal with false setting of PTE invalid
195 * bits. Therefore we cannot provide interpretation and would later
196 * have to provide own emulation handlers.
197 */
198 scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
199 scb_s->icpua = scb_o->icpua;
200
David Hildenbrand06d68a62016-04-22 13:50:09 +0200201 new_mso = scb_o->mso & 0xfffffffffff00000UL;
202 /* if the hva of the prefix changes, we have to remap the prefix */
203 if (scb_s->mso != new_mso || scb_s->prefix != scb_o->prefix)
204 prefix_unmapped(vsie_page);
David Hildenbranda3508fb2015-07-08 13:19:48 +0200205 /* SIE will do mso/msl validity and exception checks for us */
206 scb_s->msl = scb_o->msl & 0xfffffffffff00000UL;
David Hildenbrand06d68a62016-04-22 13:50:09 +0200207 scb_s->mso = new_mso;
David Hildenbranda3508fb2015-07-08 13:19:48 +0200208 scb_s->prefix = scb_o->prefix;
209
210 /* We have to definetly flush the tlb if this scb never ran */
211 if (scb_s->ihcpu != 0xffffU)
212 scb_s->ihcpu = scb_o->ihcpu;
213
214 /* MVPG and Protection Exception Interpretation are always available */
215 scb_s->eca |= scb_o->eca & 0x01002000U;
216
217out:
218 if (rc)
219 unshadow_scb(vcpu, vsie_page);
220 return rc;
221}
222
223void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
224 unsigned long end)
225{
226 struct kvm *kvm = gmap->private;
227 struct vsie_page *cur;
228 unsigned long prefix;
229 struct page *page;
230 int i;
231
232 if (!gmap_is_shadow(gmap))
233 return;
234 if (start >= 1UL << 31)
235 /* We are only interested in prefix pages */
236 return;
237
238 /*
239 * Only new shadow blocks are added to the list during runtime,
240 * therefore we can safely reference them all the time.
241 */
242 for (i = 0; i < kvm->arch.vsie.page_count; i++) {
243 page = READ_ONCE(kvm->arch.vsie.pages[i]);
244 if (!page)
245 continue;
246 cur = page_to_virt(page);
247 if (READ_ONCE(cur->gmap) != gmap)
248 continue;
249 prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT;
250 /* with mso/msl, the prefix lies at an offset */
251 prefix += cur->scb_s.mso;
252 if (prefix <= end && start <= prefix + PAGE_SIZE - 1)
253 prefix_unmapped_sync(cur);
254 }
255}
256
257/*
258 * Map the first prefix page.
259 *
260 * The prefix will be protected, a gmap notifier will inform about unmaps.
261 * The shadow scb must not be executed until the prefix is remapped, this is
262 * guaranteed by properly handling PROG_REQUEST.
263 *
264 * Returns: - 0 on if successfully mapped or already mapped
265 * - > 0 if control has to be given to guest 2
266 * - -EAGAIN if the caller can retry immediately
267 * - -ENOMEM if out of memory
268 */
269static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
270{
271 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
272 u64 prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
273 int rc;
274
David Hildenbrand06d68a62016-04-22 13:50:09 +0200275 if (prefix_is_mapped(vsie_page))
276 return 0;
277
David Hildenbranda3508fb2015-07-08 13:19:48 +0200278 /* mark it as mapped so we can catch any concurrent unmappers */
279 prefix_mapped(vsie_page);
280
281 /* with mso/msl, the prefix lies at offset *mso* */
282 prefix += scb_s->mso;
283
284 rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
285 /*
286 * We don't have to mprotect, we will be called for all unshadows.
287 * SIE will detect if protection applies and trigger a validity.
288 */
289 if (rc)
290 prefix_unmapped(vsie_page);
291 if (rc > 0 || rc == -EFAULT)
292 rc = set_validity_icpt(scb_s, 0x0037U);
293 return rc;
294}
295
296/*
297 * Pin the guest page given by gpa and set hpa to the pinned host address.
298 * Will always be pinned writable.
299 *
300 * Returns: - 0 on success
301 * - -EINVAL if the gpa is not valid guest storage
302 * - -ENOMEM if out of memory
303 */
304static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
305{
306 struct page *page;
307 hva_t hva;
308 int rc;
309
310 hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
311 if (kvm_is_error_hva(hva))
312 return -EINVAL;
313 rc = get_user_pages_fast(hva, 1, 1, &page);
314 if (rc < 0)
315 return rc;
316 else if (rc != 1)
317 return -ENOMEM;
318 *hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK);
319 return 0;
320}
321
322/* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
323static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
324{
325 struct page *page;
326
327 page = virt_to_page(hpa);
328 set_page_dirty_lock(page);
329 put_page(page);
330 /* mark the page always as dirty for migration */
331 mark_page_dirty(kvm, gpa_to_gfn(gpa));
332}
333
334/* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
335static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
336{
337 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
338 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
339 hpa_t hpa;
340 gpa_t gpa;
341
342 hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
343 if (hpa) {
344 gpa = scb_o->scaol & ~0xfUL;
345 unpin_guest_page(vcpu->kvm, gpa, hpa);
346 scb_s->scaol = 0;
347 scb_s->scaoh = 0;
348 }
349}
350
351/*
352 * Instead of shadowing some blocks, we can simply forward them because the
353 * addresses in the scb are 64 bit long.
354 *
355 * This works as long as the data lies in one page. If blocks ever exceed one
356 * page, we have to fall back to shadowing.
357 *
358 * As we reuse the sca, the vcpu pointers contained in it are invalid. We must
359 * therefore not enable any facilities that access these pointers (e.g. SIGPIF).
360 *
361 * Returns: - 0 if all blocks were pinned.
362 * - > 0 if control has to be given to guest 2
363 * - -ENOMEM if out of memory
364 */
365static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
366{
367 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
368 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
369 hpa_t hpa;
370 gpa_t gpa;
371 int rc = 0;
372
373 gpa = scb_o->scaol & ~0xfUL;
374 if (gpa) {
375 if (!(gpa & ~0x1fffUL))
376 rc = set_validity_icpt(scb_s, 0x0038U);
377 else if ((gpa & ~0x1fffUL) == kvm_s390_get_prefix(vcpu))
378 rc = set_validity_icpt(scb_s, 0x0011U);
379 else if ((gpa & PAGE_MASK) !=
380 ((gpa + sizeof(struct bsca_block) - 1) & PAGE_MASK))
381 rc = set_validity_icpt(scb_s, 0x003bU);
382 if (!rc) {
383 rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
384 if (rc == -EINVAL)
385 rc = set_validity_icpt(scb_s, 0x0034U);
386 }
387 if (rc)
388 goto unpin;
389 scb_s->scaoh = (u32)((u64)hpa >> 32);
390 scb_s->scaol = (u32)(u64)hpa;
391 }
392 return 0;
393unpin:
394 unpin_blocks(vcpu, vsie_page);
395 return rc;
396}
397
398/* unpin the scb provided by guest 2, marking it as dirty */
399static void unpin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
400 gpa_t gpa)
401{
402 hpa_t hpa = (hpa_t) vsie_page->scb_o;
403
404 if (hpa)
405 unpin_guest_page(vcpu->kvm, gpa, hpa);
406 vsie_page->scb_o = NULL;
407}
408
409/*
410 * Pin the scb at gpa provided by guest 2 at vsie_page->scb_o.
411 *
412 * Returns: - 0 if the scb was pinned.
413 * - > 0 if control has to be given to guest 2
414 * - -ENOMEM if out of memory
415 */
416static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
417 gpa_t gpa)
418{
419 hpa_t hpa;
420 int rc;
421
422 rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
423 if (rc == -EINVAL) {
424 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
425 if (!rc)
426 rc = 1;
427 }
428 if (!rc)
429 vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa;
430 return rc;
431}
432
433/*
434 * Inject a fault into guest 2.
435 *
436 * Returns: - > 0 if control has to be given to guest 2
437 * < 0 if an error occurred during injection.
438 */
439static int inject_fault(struct kvm_vcpu *vcpu, __u16 code, __u64 vaddr,
440 bool write_flag)
441{
442 struct kvm_s390_pgm_info pgm = {
443 .code = code,
444 .trans_exc_code =
445 /* 0-51: virtual address */
446 (vaddr & 0xfffffffffffff000UL) |
447 /* 52-53: store / fetch */
448 (((unsigned int) !write_flag) + 1) << 10,
449 /* 62-63: asce id (alway primary == 0) */
450 .exc_access_id = 0, /* always primary */
451 .op_access_id = 0, /* not MVPG */
452 };
453 int rc;
454
455 if (code == PGM_PROTECTION)
456 pgm.trans_exc_code |= 0x4UL;
457
458 rc = kvm_s390_inject_prog_irq(vcpu, &pgm);
459 return rc ? rc : 1;
460}
461
462/*
463 * Handle a fault during vsie execution on a gmap shadow.
464 *
465 * Returns: - 0 if the fault was resolved
466 * - > 0 if control has to be given to guest 2
467 * - < 0 if an error occurred
468 */
469static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
470{
471 int rc;
472
473 if (current->thread.gmap_int_code == PGM_PROTECTION)
474 /* we can directly forward all protection exceptions */
475 return inject_fault(vcpu, PGM_PROTECTION,
476 current->thread.gmap_addr, 1);
477
478 rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
479 current->thread.gmap_addr);
480 if (rc > 0) {
481 rc = inject_fault(vcpu, rc,
482 current->thread.gmap_addr,
483 current->thread.gmap_write_flag);
484 }
485 return rc;
486}
487
488static inline void clear_vsie_icpt(struct vsie_page *vsie_page)
489{
490 vsie_page->scb_s.icptcode = 0;
491}
492
493/*
494 * Run the vsie on a shadow scb and a shadow gmap, without any further
495 * sanity checks, handling SIE faults.
496 *
497 * Returns: - 0 everything went fine
498 * - > 0 if control has to be given to guest 2
499 * - < 0 if an error occurred
500 */
501static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
502{
503 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
504 struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
505 int rc;
506
507 if (need_resched())
508 schedule();
509 if (test_cpu_flag(CIF_MCCK_PENDING))
510 s390_handle_mcck();
511
512 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
513 local_irq_disable();
514 kvm_guest_enter();
515 local_irq_enable();
516
517 rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
518
519 local_irq_disable();
520 kvm_guest_exit();
521 local_irq_enable();
522 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
523
524 if (rc > 0)
525 rc = 0; /* we could still have an icpt */
526 else if (rc == -EFAULT)
527 return handle_fault(vcpu, vsie_page);
528
529 switch (scb_s->icptcode) {
530 case ICPT_STOP:
531 /* stop not requested by g2 - must have been a kick */
532 if (!(atomic_read(&scb_o->cpuflags) & CPUSTAT_STOP_INT))
533 clear_vsie_icpt(vsie_page);
534 break;
535 case ICPT_VALIDITY:
536 if ((scb_s->ipa & 0xf000) != 0xf000)
537 scb_s->ipa += 0x1000;
538 break;
539 }
540 return rc;
541}
542
543static void release_gmap_shadow(struct vsie_page *vsie_page)
544{
545 if (vsie_page->gmap)
546 gmap_put(vsie_page->gmap);
547 WRITE_ONCE(vsie_page->gmap, NULL);
David Hildenbrand06d68a62016-04-22 13:50:09 +0200548 prefix_unmapped(vsie_page);
David Hildenbranda3508fb2015-07-08 13:19:48 +0200549}
550
551static int acquire_gmap_shadow(struct kvm_vcpu *vcpu,
552 struct vsie_page *vsie_page)
553{
554 unsigned long asce;
555 union ctlreg0 cr0;
556 struct gmap *gmap;
557 int edat;
558
559 asce = vcpu->arch.sie_block->gcr[1];
560 cr0.val = vcpu->arch.sie_block->gcr[0];
561 edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
562 edat += edat && test_kvm_facility(vcpu->kvm, 78);
563
David Hildenbrand06d68a62016-04-22 13:50:09 +0200564 /*
565 * ASCE or EDAT could have changed since last icpt, or the gmap
566 * we're holding has been unshadowed. If the gmap is still valid,
567 * we can safely reuse it.
568 */
569 if (vsie_page->gmap && gmap_shadow_valid(vsie_page->gmap, asce, edat))
570 return 0;
571
572 /* release the old shadow - if any, and mark the prefix as unmapped */
573 release_gmap_shadow(vsie_page);
David Hildenbranda3508fb2015-07-08 13:19:48 +0200574 gmap = gmap_shadow(vcpu->arch.gmap, asce, edat);
575 if (IS_ERR(gmap))
576 return PTR_ERR(gmap);
577 gmap->private = vcpu->kvm;
578 WRITE_ONCE(vsie_page->gmap, gmap);
579 return 0;
580}
581
582/*
583 * Run the vsie on a shadowed scb, managing the gmap shadow, handling
584 * prefix pages and faults.
585 *
586 * Returns: - 0 if no errors occurred
587 * - > 0 if control has to be given to guest 2
588 * - -ENOMEM if out of memory
589 */
590static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
591{
592 struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
593 int rc = 0;
594
595 while (1) {
596 rc = acquire_gmap_shadow(vcpu, vsie_page);
597 if (!rc)
598 rc = map_prefix(vcpu, vsie_page);
599 if (!rc) {
600 gmap_enable(vsie_page->gmap);
601 update_intervention_requests(vsie_page);
602 rc = do_vsie_run(vcpu, vsie_page);
603 gmap_enable(vcpu->arch.gmap);
604 }
David Hildenbranda3508fb2015-07-08 13:19:48 +0200605
606 if (rc == -EAGAIN)
607 rc = 0;
608 if (rc || scb_s->icptcode || signal_pending(current) ||
609 kvm_s390_vcpu_has_irq(vcpu, 0))
610 break;
611 };
612
613 if (rc == -EFAULT) {
614 /*
615 * Addressing exceptions are always presentes as intercepts.
616 * As addressing exceptions are suppressing and our guest 3 PSW
617 * points at the responsible instruction, we have to
618 * forward the PSW and set the ilc. If we can't read guest 3
619 * instruction, we can use an arbitrary ilc. Let's always use
620 * ilen = 4 for now, so we can avoid reading in guest 3 virtual
621 * memory. (we could also fake the shadow so the hardware
622 * handles it).
623 */
624 scb_s->icptcode = ICPT_PROGI;
625 scb_s->iprcc = PGM_ADDRESSING;
626 scb_s->pgmilc = 4;
627 scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4);
628 }
629 return rc;
630}
631
632/*
633 * Get or create a vsie page for a scb address.
634 *
635 * Returns: - address of a vsie page (cached or new one)
636 * - NULL if the same scb address is already used by another VCPU
637 * - ERR_PTR(-ENOMEM) if out of memory
638 */
639static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
640{
641 struct vsie_page *vsie_page;
642 struct page *page;
643 int nr_vcpus;
644
645 rcu_read_lock();
646 page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9);
647 rcu_read_unlock();
648 if (page) {
649 if (page_ref_inc_return(page) == 2)
650 return page_to_virt(page);
651 page_ref_dec(page);
652 }
653
654 /*
655 * We want at least #online_vcpus shadows, so every VCPU can execute
656 * the VSIE in parallel.
657 */
658 nr_vcpus = atomic_read(&kvm->online_vcpus);
659
660 mutex_lock(&kvm->arch.vsie.mutex);
661 if (kvm->arch.vsie.page_count < nr_vcpus) {
662 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
663 if (!page) {
664 mutex_unlock(&kvm->arch.vsie.mutex);
665 return ERR_PTR(-ENOMEM);
666 }
667 page_ref_inc(page);
668 kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page;
669 kvm->arch.vsie.page_count++;
670 } else {
671 /* reuse an existing entry that belongs to nobody */
672 while (true) {
673 page = kvm->arch.vsie.pages[kvm->arch.vsie.next];
674 if (page_ref_inc_return(page) == 2)
675 break;
676 page_ref_dec(page);
677 kvm->arch.vsie.next++;
678 kvm->arch.vsie.next %= nr_vcpus;
679 }
680 radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
681 }
682 page->index = addr;
683 /* double use of the same address */
684 if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) {
685 page_ref_dec(page);
686 mutex_unlock(&kvm->arch.vsie.mutex);
687 return NULL;
688 }
689 mutex_unlock(&kvm->arch.vsie.mutex);
690
691 vsie_page = page_to_virt(page);
692 memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block));
David Hildenbrand06d68a62016-04-22 13:50:09 +0200693 release_gmap_shadow(vsie_page);
David Hildenbranda3508fb2015-07-08 13:19:48 +0200694 vsie_page->scb_s.ihcpu = 0xffffU;
695 return vsie_page;
696}
697
698/* put a vsie page acquired via get_vsie_page */
699static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page)
700{
701 struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT);
702
703 page_ref_dec(page);
704}
705
706int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
707{
708 struct vsie_page *vsie_page;
709 unsigned long scb_addr;
710 int rc;
711
712 vcpu->stat.instruction_sie++;
713 if (!test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIEF2))
714 return -EOPNOTSUPP;
715 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
716 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
717
718 BUILD_BUG_ON(sizeof(struct vsie_page) != 4096);
719 scb_addr = kvm_s390_get_base_disp_s(vcpu, NULL);
720
721 /* 512 byte alignment */
722 if (unlikely(scb_addr & 0x1ffUL))
723 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
724
725 if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0))
726 return 0;
727
728 vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
729 if (IS_ERR(vsie_page))
730 return PTR_ERR(vsie_page);
731 else if (!vsie_page)
732 /* double use of sie control block - simply do nothing */
733 return 0;
734
735 rc = pin_scb(vcpu, vsie_page, scb_addr);
736 if (rc)
737 goto out_put;
738 rc = shadow_scb(vcpu, vsie_page);
739 if (rc)
740 goto out_unpin_scb;
741 rc = pin_blocks(vcpu, vsie_page);
742 if (rc)
743 goto out_unshadow;
744 rc = vsie_run(vcpu, vsie_page);
745 unpin_blocks(vcpu, vsie_page);
746out_unshadow:
747 unshadow_scb(vcpu, vsie_page);
748out_unpin_scb:
749 unpin_scb(vcpu, vsie_page, scb_addr);
750out_put:
751 put_vsie_page(vcpu->kvm, vsie_page);
752
753 return rc < 0 ? rc : 0;
754}
755
756/* Init the vsie data structures. To be called when a vm is initialized. */
757void kvm_s390_vsie_init(struct kvm *kvm)
758{
759 mutex_init(&kvm->arch.vsie.mutex);
760 INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL);
761}
762
763/* Destroy the vsie data structures. To be called when a vm is destroyed. */
764void kvm_s390_vsie_destroy(struct kvm *kvm)
765{
David Hildenbrand06d68a62016-04-22 13:50:09 +0200766 struct vsie_page *vsie_page;
David Hildenbranda3508fb2015-07-08 13:19:48 +0200767 struct page *page;
768 int i;
769
770 mutex_lock(&kvm->arch.vsie.mutex);
771 for (i = 0; i < kvm->arch.vsie.page_count; i++) {
772 page = kvm->arch.vsie.pages[i];
773 kvm->arch.vsie.pages[i] = NULL;
David Hildenbrand06d68a62016-04-22 13:50:09 +0200774 vsie_page = page_to_virt(page);
775 release_gmap_shadow(vsie_page);
David Hildenbranda3508fb2015-07-08 13:19:48 +0200776 /* free the radix tree entry */
777 radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9);
778 __free_page(page);
779 }
780 kvm->arch.vsie.page_count = 0;
781 mutex_unlock(&kvm->arch.vsie.mutex);
782}