cxl: Mask slice error interrupts after first occurrence
In some situations, a faulty AFU slice may create an interrupt storm of
slice errors, rendering the machine unusable. Since these interrupts are
informational only, present the interrupt once, then mask it off to
prevent it from being retriggered until the AFU is reset.
Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Reviewed-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 194c58e..871a2f0 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -95,12 +95,23 @@
/* This will disable as well as reset */
static int native_afu_reset(struct cxl_afu *afu)
{
+ int rc;
+ u64 serr;
+
pr_devel("AFU reset request\n");
- return afu_control(afu, CXL_AFU_Cntl_An_RA, 0,
+ rc = afu_control(afu, CXL_AFU_Cntl_An_RA, 0,
CXL_AFU_Cntl_An_RS_Complete | CXL_AFU_Cntl_An_ES_Disabled,
CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK,
false);
+
+ /* Re-enable any masked interrupts */
+ serr = cxl_p1n_read(afu, CXL_PSL_SERR_An);
+ serr &= ~CXL_PSL_SERR_An_IRQ_MASKS;
+ cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
+
+
+ return rc;
}
static int native_afu_check_and_enable(struct cxl_afu *afu)
@@ -1205,7 +1216,7 @@
{
struct cxl_afu *afu = data;
u64 errstat, serr, afu_error, dsisr;
- u64 fir_slice, afu_debug;
+ u64 fir_slice, afu_debug, irq_mask;
/*
* slice err interrupt is only used with full PSL (no XSL)
@@ -1226,7 +1237,11 @@
dev_crit(&afu->dev, "AFU_ERR_An: 0x%.16llx\n", afu_error);
dev_crit(&afu->dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr);
+ /* mask off the IRQ so it won't retrigger until the AFU is reset */
+ irq_mask = (serr & CXL_PSL_SERR_An_IRQS) >> 32;
+ serr |= irq_mask;
cxl_p1n_write(afu, CXL_PSL_SERR_An, serr);
+ dev_info(&afu->dev, "Further such interrupts will be masked until the AFU is reset\n");
return IRQ_HANDLED;
}