blob: 5fae59186cb24fe5f1e12bc9cae7f4a8a5bda3a9 [file] [log] [blame]
Benjamin Herrenschmidt243e2512017-04-05 17:54:50 +10001/*
2 * Copyright 2016,2017 IBM Corporation.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#define pr_fmt(fmt) "xive: " fmt
11
12#include <linux/types.h>
13#include <linux/irq.h>
14#include <linux/debugfs.h>
15#include <linux/smp.h>
16#include <linux/interrupt.h>
17#include <linux/seq_file.h>
18#include <linux/init.h>
19#include <linux/of.h>
20#include <linux/slab.h>
21#include <linux/spinlock.h>
22#include <linux/delay.h>
23#include <linux/cpumask.h>
24#include <linux/mm.h>
25
26#include <asm/prom.h>
27#include <asm/io.h>
28#include <asm/smp.h>
29#include <asm/irq.h>
30#include <asm/errno.h>
31#include <asm/xive.h>
32#include <asm/xive-regs.h>
33#include <asm/opal.h>
34
35#include "xive-internal.h"
36
37
38static u32 xive_provision_size;
39static u32 *xive_provision_chips;
40static u32 xive_provision_chip_count;
41static u32 xive_queue_shift;
42static u32 xive_pool_vps = XIVE_INVALID_VP;
43static struct kmem_cache *xive_provision_cache;
44
45int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
46{
47 __be64 flags, eoi_page, trig_page;
48 __be32 esb_shift, src_chip;
49 u64 opal_flags;
50 s64 rc;
51
52 memset(data, 0, sizeof(*data));
53
54 rc = opal_xive_get_irq_info(hw_irq, &flags, &eoi_page, &trig_page,
55 &esb_shift, &src_chip);
56 if (rc) {
57 pr_err("opal_xive_get_irq_info(0x%x) returned %lld\n",
58 hw_irq, rc);
59 return -EINVAL;
60 }
61
62 opal_flags = be64_to_cpu(flags);
63 if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI)
64 data->flags |= XIVE_IRQ_FLAG_STORE_EOI;
65 if (opal_flags & OPAL_XIVE_IRQ_LSI)
66 data->flags |= XIVE_IRQ_FLAG_LSI;
67 if (opal_flags & OPAL_XIVE_IRQ_SHIFT_BUG)
68 data->flags |= XIVE_IRQ_FLAG_SHIFT_BUG;
69 if (opal_flags & OPAL_XIVE_IRQ_MASK_VIA_FW)
70 data->flags |= XIVE_IRQ_FLAG_MASK_FW;
71 if (opal_flags & OPAL_XIVE_IRQ_EOI_VIA_FW)
72 data->flags |= XIVE_IRQ_FLAG_EOI_FW;
73 data->eoi_page = be64_to_cpu(eoi_page);
74 data->trig_page = be64_to_cpu(trig_page);
75 data->esb_shift = be32_to_cpu(esb_shift);
76 data->src_chip = be32_to_cpu(src_chip);
77
78 data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift);
79 if (!data->eoi_mmio) {
80 pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq);
81 return -ENOMEM;
82 }
83
84 if (!data->trig_page)
85 return 0;
86 if (data->trig_page == data->eoi_page) {
87 data->trig_mmio = data->eoi_mmio;
88 return 0;
89 }
90
91 data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift);
92 if (!data->trig_mmio) {
93 pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq);
94 return -ENOMEM;
95 }
96 return 0;
97}
98
99int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
100{
101 s64 rc;
102
103 for (;;) {
104 rc = opal_xive_set_irq_config(hw_irq, target, prio, sw_irq);
105 if (rc != OPAL_BUSY)
106 break;
107 msleep(1);
108 }
109 return rc == 0 ? 0 : -ENXIO;
110}
111
112/* This can be called multiple time to change a queue configuration */
113int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
114 __be32 *qpage, u32 order, bool can_escalate)
115{
116 s64 rc = 0;
117 __be64 qeoi_page_be;
118 __be32 esc_irq_be;
119 u64 flags, qpage_phys;
120
121 /* If there's an actual queue page, clean it */
122 if (order) {
123 if (WARN_ON(!qpage))
124 return -EINVAL;
125 qpage_phys = __pa(qpage);
126 } else
127 qpage_phys = 0;
128
129 /* Initialize the rest of the fields */
130 q->msk = order ? ((1u << (order - 2)) - 1) : 0;
131 q->idx = 0;
132 q->toggle = 0;
133
134 rc = opal_xive_get_queue_info(vp_id, prio, NULL, NULL,
135 &qeoi_page_be,
136 &esc_irq_be,
137 NULL);
138 if (rc) {
139 pr_err("Error %lld getting queue info prio %d\n", rc, prio);
140 rc = -EIO;
141 goto fail;
142 }
143 q->eoi_phys = be64_to_cpu(qeoi_page_be);
144
145 /* Default flags */
146 flags = OPAL_XIVE_EQ_ALWAYS_NOTIFY | OPAL_XIVE_EQ_ENABLED;
147
148 /* Escalation needed ? */
149 if (can_escalate) {
150 q->esc_irq = be32_to_cpu(esc_irq_be);
151 flags |= OPAL_XIVE_EQ_ESCALATE;
152 }
153
154 /* Configure and enable the queue in HW */
155 for (;;) {
156 rc = opal_xive_set_queue_info(vp_id, prio, qpage_phys, order, flags);
157 if (rc != OPAL_BUSY)
158 break;
159 msleep(1);
160 }
161 if (rc) {
162 pr_err("Error %lld setting queue for prio %d\n", rc, prio);
163 rc = -EIO;
164 } else {
165 /*
166 * KVM code requires all of the above to be visible before
167 * q->qpage is set due to how it manages IPI EOIs
168 */
169 wmb();
170 q->qpage = qpage;
171 }
172fail:
173 return rc;
174}
175
176static void __xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
177{
178 s64 rc;
179
180 /* Disable the queue in HW */
181 for (;;) {
182 rc = opal_xive_set_queue_info(vp_id, prio, 0, 0, 0);
183 break;
184 msleep(1);
185 }
186 if (rc)
187 pr_err("Error %lld disabling queue for prio %d\n", rc, prio);
188}
189
190void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
191{
192 __xive_native_disable_queue(vp_id, q, prio);
193}
194
195static int xive_native_setup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
196{
197 struct xive_q *q = &xc->queue[prio];
198 unsigned int alloc_order;
199 struct page *pages;
200 __be32 *qpage;
201
202 alloc_order = (xive_queue_shift > PAGE_SHIFT) ?
203 (xive_queue_shift - PAGE_SHIFT) : 0;
204 pages = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, alloc_order);
205 if (!pages)
206 return -ENOMEM;
207 qpage = (__be32 *)page_address(pages);
208 memset(qpage, 0, 1 << xive_queue_shift);
209 return xive_native_configure_queue(get_hard_smp_processor_id(cpu),
210 q, prio, qpage, xive_queue_shift, false);
211}
212
213static void xive_native_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
214{
215 struct xive_q *q = &xc->queue[prio];
216 unsigned int alloc_order;
217
218 /*
219 * We use the variant with no iounmap as this is called on exec
220 * from an IPI and iounmap isn't safe
221 */
222 __xive_native_disable_queue(get_hard_smp_processor_id(cpu), q, prio);
223 alloc_order = (xive_queue_shift > PAGE_SHIFT) ?
224 (xive_queue_shift - PAGE_SHIFT) : 0;
225 free_pages((unsigned long)q->qpage, alloc_order);
226 q->qpage = NULL;
227}
228
229static bool xive_native_match(struct device_node *node)
230{
231 return of_device_is_compatible(node, "ibm,opal-xive-vc");
232}
233
234#ifdef CONFIG_SMP
235static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
236{
237 struct device_node *np;
238 unsigned int chip_id;
239 s64 irq;
240
241 /* Find the chip ID */
242 np = of_get_cpu_node(cpu, NULL);
243 if (np) {
244 if (of_property_read_u32(np, "ibm,chip-id", &chip_id) < 0)
245 chip_id = 0;
246 }
247
248 /* Allocate an IPI and populate info about it */
249 for (;;) {
250 irq = opal_xive_allocate_irq(chip_id);
251 if (irq == OPAL_BUSY) {
252 msleep(1);
253 continue;
254 }
255 if (irq < 0) {
256 pr_err("Failed to allocate IPI on CPU %d\n", cpu);
257 return -ENXIO;
258 }
259 xc->hw_ipi = irq;
260 break;
261 }
262 return 0;
263}
264
265u32 xive_native_alloc_irq(void)
266{
267 s64 rc;
268
269 for (;;) {
270 rc = opal_xive_allocate_irq(OPAL_XIVE_ANY_CHIP);
271 if (rc != OPAL_BUSY)
272 break;
273 msleep(1);
274 }
275 if (rc < 0)
276 return 0;
277 return rc;
278}
279
280void xive_native_free_irq(u32 irq)
281{
282 for (;;) {
283 s64 rc = opal_xive_free_irq(irq);
284 if (rc != OPAL_BUSY)
285 break;
286 msleep(1);
287 }
288}
289
290static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc)
291{
292 s64 rc;
293
294 /* Free the IPI */
295 if (!xc->hw_ipi)
296 return;
297 for (;;) {
298 rc = opal_xive_free_irq(xc->hw_ipi);
299 if (rc == OPAL_BUSY) {
300 msleep(1);
301 continue;
302 }
303 xc->hw_ipi = 0;
304 break;
305 }
306}
307#endif /* CONFIG_SMP */
308
309static void xive_native_shutdown(void)
310{
311 /* Switch the XIVE to emulation mode */
312 opal_xive_reset(OPAL_XIVE_MODE_EMU);
313}
314
315/*
316 * Perform an "ack" cycle on the current thread, thus
317 * grabbing the pending active priorities and updating
318 * the CPPR to the most favored one.
319 */
320static void xive_native_update_pending(struct xive_cpu *xc)
321{
322 u8 he, cppr;
323 u16 ack;
324
325 /* Perform the acknowledge hypervisor to register cycle */
326 ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_HV_REG));
327
328 /* Synchronize subsequent queue accesses */
329 mb();
330
331 /*
332 * Grab the CPPR and the "HE" field which indicates the source
333 * of the hypervisor interrupt (if any)
334 */
335 cppr = ack & 0xff;
336 he = GETFIELD(TM_QW3_NSR_HE, (ack >> 8));
337 switch(he) {
338 case TM_QW3_NSR_HE_NONE: /* Nothing to see here */
339 break;
340 case TM_QW3_NSR_HE_PHYS: /* Physical thread interrupt */
341 if (cppr == 0xff)
342 return;
343 /* Mark the priority pending */
344 xc->pending_prio |= 1 << cppr;
345
346 /*
347 * A new interrupt should never have a CPPR less favored
348 * than our current one.
349 */
350 if (cppr >= xc->cppr)
351 pr_err("CPU %d odd ack CPPR, got %d at %d\n",
352 smp_processor_id(), cppr, xc->cppr);
353
354 /* Update our idea of what the CPPR is */
355 xc->cppr = cppr;
356 break;
357 case TM_QW3_NSR_HE_POOL: /* HV Pool interrupt (unused) */
358 case TM_QW3_NSR_HE_LSI: /* Legacy FW LSI (unused) */
359 pr_err("CPU %d got unexpected interrupt type HE=%d\n",
360 smp_processor_id(), he);
361 return;
362 }
363}
364
365static void xive_native_eoi(u32 hw_irq)
366{
367 /*
368 * Not normally used except if specific interrupts need
369 * a workaround on EOI.
370 */
371 opal_int_eoi(hw_irq);
372}
373
374static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
375{
376 s64 rc;
377 u32 vp;
378 __be64 vp_cam_be;
379 u64 vp_cam;
380
381 if (xive_pool_vps == XIVE_INVALID_VP)
382 return;
383
384 /* Enable the pool VP */
385 vp = xive_pool_vps + get_hard_smp_processor_id(cpu);
386 pr_debug("CPU %d setting up pool VP 0x%x\n", cpu, vp);
387 for (;;) {
388 rc = opal_xive_set_vp_info(vp, OPAL_XIVE_VP_ENABLED, 0);
389 if (rc != OPAL_BUSY)
390 break;
391 msleep(1);
392 }
393 if (rc) {
394 pr_err("Failed to enable pool VP on CPU %d\n", cpu);
395 return;
396 }
397
398 /* Grab it's CAM value */
399 rc = opal_xive_get_vp_info(vp, NULL, &vp_cam_be, NULL, NULL);
400 if (rc) {
401 pr_err("Failed to get pool VP info CPU %d\n", cpu);
402 return;
403 }
404 vp_cam = be64_to_cpu(vp_cam_be);
405
406 pr_debug("VP CAM = %llx\n", vp_cam);
407
408 /* Push it on the CPU (set LSMFB to 0xff to skip backlog scan) */
409 pr_debug("(Old HW value: %08x)\n",
410 in_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2));
411 out_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD0, 0xff);
412 out_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2,
413 TM_QW2W2_VP | vp_cam);
414 pr_debug("(New HW value: %08x)\n",
415 in_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2));
416}
417
418static void xive_native_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
419{
420 s64 rc;
421 u32 vp;
422
423 if (xive_pool_vps == XIVE_INVALID_VP)
424 return;
425
426 /* Pull the pool VP from the CPU */
427 in_be64(xive_tima + TM_SPC_PULL_POOL_CTX);
428
429 /* Disable it */
430 vp = xive_pool_vps + get_hard_smp_processor_id(cpu);
431 for (;;) {
432 rc = opal_xive_set_vp_info(vp, 0, 0);
433 if (rc != OPAL_BUSY)
434 break;
435 msleep(1);
436 }
437}
438
439static void xive_native_sync_source(u32 hw_irq)
440{
441 opal_xive_sync(XIVE_SYNC_EAS, hw_irq);
442}
443
444static const struct xive_ops xive_native_ops = {
445 .populate_irq_data = xive_native_populate_irq_data,
446 .configure_irq = xive_native_configure_irq,
447 .setup_queue = xive_native_setup_queue,
448 .cleanup_queue = xive_native_cleanup_queue,
449 .match = xive_native_match,
450 .shutdown = xive_native_shutdown,
451 .update_pending = xive_native_update_pending,
452 .eoi = xive_native_eoi,
453 .setup_cpu = xive_native_setup_cpu,
454 .teardown_cpu = xive_native_teardown_cpu,
455 .sync_source = xive_native_sync_source,
456#ifdef CONFIG_SMP
457 .get_ipi = xive_native_get_ipi,
458 .put_ipi = xive_native_put_ipi,
459#endif /* CONFIG_SMP */
460 .name = "native",
461};
462
463static bool xive_parse_provisioning(struct device_node *np)
464{
465 int rc;
466
467 if (of_property_read_u32(np, "ibm,xive-provision-page-size",
468 &xive_provision_size) < 0)
469 return true;
470 rc = of_property_count_elems_of_size(np, "ibm,xive-provision-chips", 4);
471 if (rc < 0) {
472 pr_err("Error %d getting provision chips array\n", rc);
473 return false;
474 }
475 xive_provision_chip_count = rc;
476 if (rc == 0)
477 return true;
478
479 xive_provision_chips = kzalloc(4 * xive_provision_chip_count,
480 GFP_KERNEL);
481 if (WARN_ON(!xive_provision_chips))
482 return false;
483
484 rc = of_property_read_u32_array(np, "ibm,xive-provision-chips",
485 xive_provision_chips,
486 xive_provision_chip_count);
487 if (rc < 0) {
488 pr_err("Error %d reading provision chips array\n", rc);
489 return false;
490 }
491
492 xive_provision_cache = kmem_cache_create("xive-provision",
493 xive_provision_size,
494 xive_provision_size,
495 0, NULL);
496 if (!xive_provision_cache) {
497 pr_err("Failed to allocate provision cache\n");
498 return false;
499 }
500 return true;
501}
502
503u32 xive_native_default_eq_shift(void)
504{
505 return xive_queue_shift;
506}
507
508bool xive_native_init(void)
509{
510 struct device_node *np;
511 struct resource r;
512 void __iomem *tima;
513 struct property *prop;
514 u8 max_prio = 7;
515 const __be32 *p;
516 u32 val;
517 s64 rc;
518
519 if (xive_cmdline_disabled)
520 return false;
521
522 pr_devel("xive_native_init()\n");
523 np = of_find_compatible_node(NULL, NULL, "ibm,opal-xive-pe");
524 if (!np) {
525 pr_devel("not found !\n");
526 return false;
527 }
528 pr_devel("Found %s\n", np->full_name);
529
530 /* Resource 1 is HV window */
531 if (of_address_to_resource(np, 1, &r)) {
532 pr_err("Failed to get thread mgmnt area resource\n");
533 return false;
534 }
535 tima = ioremap(r.start, resource_size(&r));
536 if (!tima) {
537 pr_err("Failed to map thread mgmnt area\n");
538 return false;
539 }
540
541 /* Read number of priorities */
542 if (of_property_read_u32(np, "ibm,xive-#priorities", &val) == 0)
543 max_prio = val - 1;
544
545 /* Iterate the EQ sizes and pick one */
546 of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, p, val) {
547 xive_queue_shift = val;
548 if (val == PAGE_SHIFT)
549 break;
550 }
551
552 /* Grab size of provisioning pages */
553 xive_parse_provisioning(np);
554
555 /* Switch the XIVE to exploitation mode */
556 rc = opal_xive_reset(OPAL_XIVE_MODE_EXPL);
557 if (rc) {
558 pr_err("Switch to exploitation mode failed with error %lld\n", rc);
559 return false;
560 }
561
562 /* Initialize XIVE core with our backend */
563 if (!xive_core_init(&xive_native_ops, tima, TM_QW3_HV_PHYS,
564 max_prio)) {
565 opal_xive_reset(OPAL_XIVE_MODE_EMU);
566 return false;
567 }
568 pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10));
569 return true;
570}
571
572static bool xive_native_provision_pages(void)
573{
574 u32 i;
575 void *p;
576
577 for (i = 0; i < xive_provision_chip_count; i++) {
578 u32 chip = xive_provision_chips[i];
579
580 /*
581 * XXX TODO: Try to make the allocation local to the node where
582 * the chip resides.
583 */
584 p = kmem_cache_alloc(xive_provision_cache, GFP_KERNEL);
585 if (!p) {
586 pr_err("Failed to allocate provisioning page\n");
587 return false;
588 }
589 opal_xive_donate_page(chip, __pa(p));
590 }
591 return true;
592}
593
594u32 xive_native_alloc_vp_block(u32 max_vcpus)
595{
596 s64 rc;
597 u32 order;
598
599 order = fls(max_vcpus) - 1;
600 if (max_vcpus > (1 << order))
601 order++;
602
603 pr_info("VP block alloc, for max VCPUs %d use order %d\n",
604 max_vcpus, order);
605
606 for (;;) {
607 rc = opal_xive_alloc_vp_block(order);
608 switch (rc) {
609 case OPAL_BUSY:
610 msleep(1);
611 break;
612 case OPAL_XIVE_PROVISIONING:
613 if (!xive_native_provision_pages())
614 return XIVE_INVALID_VP;
615 break;
616 default:
617 if (rc < 0) {
618 pr_err("OPAL failed to allocate VCPUs order %d, err %lld\n",
619 order, rc);
620 return XIVE_INVALID_VP;
621 }
622 return rc;
623 }
624 }
625}
626EXPORT_SYMBOL_GPL(xive_native_alloc_vp_block);
627
628void xive_native_free_vp_block(u32 vp_base)
629{
630 s64 rc;
631
632 if (vp_base == XIVE_INVALID_VP)
633 return;
634
635 rc = opal_xive_free_vp_block(vp_base);
636 if (rc < 0)
637 pr_warn("OPAL error %lld freeing VP block\n", rc);
638}
639EXPORT_SYMBOL_GPL(xive_native_free_vp_block);