RDMA/ocrdma: Do proper cleanup even if FW is in error state
If any mailbox command reports timeout, save the state in the driver,
to prevent issuing any more commands to the HW. Do proper cleanup
even if FW is in error state.
Signed-off-by: Mitesh Ahuja <mitesh.ahuja@emulex.Com>
Signed-off-by: Selvin Xavier <selvin.xavier@emulex.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index 5cd65c2..fc27378 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -137,6 +137,7 @@
u16 cqe_status;
u16 ext_status;
bool cmd_done;
+ bool fw_error_state;
};
struct ocrdma_hw_mr {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 55308b6..5b6e9d9c7 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -966,8 +966,12 @@
msecs_to_jiffies(30000));
if (status)
return 0;
- else
+ else {
+ dev->mqe_ctx.fw_error_state = true;
+ pr_err("%s(%d) mailbox timeout: fw not responding\n",
+ __func__, dev->id);
return -1;
+ }
}
/* issue a mailbox command on the MQ */
@@ -979,6 +983,8 @@
struct ocrdma_mbx_rsp *rsp = NULL;
mutex_lock(&dev->mqe_ctx.lock);
+ if (dev->mqe_ctx.fw_error_state)
+ goto mbx_err;
ocrdma_post_mqe(dev, mqe);
status = ocrdma_wait_mqe_cmpl(dev);
if (status)
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 7f54d24..8cd16a1 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -329,7 +329,10 @@
struct ocrdma_pd *pd = uctx->cntxt_pd;
struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
- BUG_ON(uctx->pd_in_use);
+ if (uctx->pd_in_use) {
+ pr_err("%s(%d) Freeing in use pdid=0x%x.\n",
+ __func__, dev->id, pd->id);
+ }
uctx->cntxt_pd = NULL;
status = _ocrdma_dealloc_pd(dev, pd);
return status;
@@ -844,6 +847,13 @@
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr);
+
+ /* Don't stop cleanup, in case FW is unresponsive */
+ if (dev->mqe_ctx.fw_error_state) {
+ status = 0;
+ pr_err("%s(%d) fw not responding.\n",
+ __func__, dev->id);
+ }
return status;
}