RDMA/cma: Add locking around QP accesses
If a user allocates a QP on an rdma_cm_id, the rdma_cm will automatically
transition the QP through its states (RTR, RTS, error, etc.) While the
QP state transitions are occurring, the QP itself must remain valid.
Provide locking around the QP pointer to prevent its destruction while
accessing the pointer.
This fixes an issue reported by Olaf Kirch from Oracle that resulted in
a system crash:
"An incoming connection arrives and we decide to tear down the nascent
connection. The remote ends decides to do the same. We start to shut
down the connection, and call rdma_destroy_qp on our cm_id. ... Now
apparently a 'connect reject' message comes in from the other host,
and cma_ib_handler() is called with an event of IB_CM_REJ_RECEIVED.
It calls cma_modify_qp_err, which for some odd reason tries to modify
the exact same QP we just destroyed."
Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 93644f8..01ae052 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -121,6 +121,8 @@
enum cma_state state;
spinlock_t lock;
+ struct mutex qp_mutex;
+
struct completion comp;
atomic_t refcount;
wait_queue_head_t wait_remove;
@@ -389,6 +391,7 @@
id_priv->id.event_handler = event_handler;
id_priv->id.ps = ps;
spin_lock_init(&id_priv->lock);
+ mutex_init(&id_priv->qp_mutex);
init_completion(&id_priv->comp);
atomic_set(&id_priv->refcount, 1);
init_waitqueue_head(&id_priv->wait_remove);
@@ -474,61 +477,86 @@
void rdma_destroy_qp(struct rdma_cm_id *id)
{
- ib_destroy_qp(id->qp);
+ struct rdma_id_private *id_priv;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ mutex_lock(&id_priv->qp_mutex);
+ ib_destroy_qp(id_priv->id.qp);
+ id_priv->id.qp = NULL;
+ mutex_unlock(&id_priv->qp_mutex);
}
EXPORT_SYMBOL(rdma_destroy_qp);
-static int cma_modify_qp_rtr(struct rdma_cm_id *id)
+static int cma_modify_qp_rtr(struct rdma_id_private *id_priv)
{
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
- if (!id->qp)
- return 0;
+ mutex_lock(&id_priv->qp_mutex);
+ if (!id_priv->id.qp) {
+ ret = 0;
+ goto out;
+ }
/* Need to update QP attributes from default values. */
qp_attr.qp_state = IB_QPS_INIT;
- ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
+ ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
if (ret)
- return ret;
+ goto out;
- ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
+ ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
if (ret)
- return ret;
+ goto out;
qp_attr.qp_state = IB_QPS_RTR;
- ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
+ ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
if (ret)
- return ret;
+ goto out;
- return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
+ ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
+out:
+ mutex_unlock(&id_priv->qp_mutex);
+ return ret;
}
-static int cma_modify_qp_rts(struct rdma_cm_id *id)
+static int cma_modify_qp_rts(struct rdma_id_private *id_priv)
{
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
- if (!id->qp)
- return 0;
+ mutex_lock(&id_priv->qp_mutex);
+ if (!id_priv->id.qp) {
+ ret = 0;
+ goto out;
+ }
qp_attr.qp_state = IB_QPS_RTS;
- ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
+ ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
if (ret)
- return ret;
+ goto out;
- return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
+ ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
+out:
+ mutex_unlock(&id_priv->qp_mutex);
+ return ret;
}
-static int cma_modify_qp_err(struct rdma_cm_id *id)
+static int cma_modify_qp_err(struct rdma_id_private *id_priv)
{
struct ib_qp_attr qp_attr;
+ int ret;
- if (!id->qp)
- return 0;
+ mutex_lock(&id_priv->qp_mutex);
+ if (!id_priv->id.qp) {
+ ret = 0;
+ goto out;
+ }
qp_attr.qp_state = IB_QPS_ERR;
- return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
+ ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
+out:
+ mutex_unlock(&id_priv->qp_mutex);
+ return ret;
}
static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
@@ -857,11 +885,11 @@
{
int ret;
- ret = cma_modify_qp_rtr(&id_priv->id);
+ ret = cma_modify_qp_rtr(id_priv);
if (ret)
goto reject;
- ret = cma_modify_qp_rts(&id_priv->id);
+ ret = cma_modify_qp_rts(id_priv);
if (ret)
goto reject;
@@ -871,7 +899,7 @@
return 0;
reject:
- cma_modify_qp_err(&id_priv->id);
+ cma_modify_qp_err(id_priv);
ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, NULL, 0);
return ret;
@@ -947,7 +975,7 @@
/* ignore event */
goto out;
case IB_CM_REJ_RECEIVED:
- cma_modify_qp_err(&id_priv->id);
+ cma_modify_qp_err(id_priv);
event.status = ib_event->param.rej_rcvd.reason;
event.event = RDMA_CM_EVENT_REJECTED;
event.param.conn.private_data = ib_event->private_data;
@@ -2264,7 +2292,7 @@
sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
cm_id->remote_addr = *sin;
- ret = cma_modify_qp_rtr(&id_priv->id);
+ ret = cma_modify_qp_rtr(id_priv);
if (ret)
goto out;
@@ -2331,7 +2359,7 @@
int qp_attr_mask, ret;
if (id_priv->id.qp) {
- ret = cma_modify_qp_rtr(&id_priv->id);
+ ret = cma_modify_qp_rtr(id_priv);
if (ret)
goto out;
@@ -2370,7 +2398,7 @@
struct iw_cm_conn_param iw_param;
int ret;
- ret = cma_modify_qp_rtr(&id_priv->id);
+ ret = cma_modify_qp_rtr(id_priv);
if (ret)
return ret;
@@ -2442,7 +2470,7 @@
return 0;
reject:
- cma_modify_qp_err(id);
+ cma_modify_qp_err(id_priv);
rdma_reject(id, NULL, 0);
return ret;
}
@@ -2512,7 +2540,7 @@
switch (rdma_node_get_transport(id->device->node_type)) {
case RDMA_TRANSPORT_IB:
- ret = cma_modify_qp_err(id);
+ ret = cma_modify_qp_err(id_priv);
if (ret)
goto out;
/* Initiate or respond to a disconnect. */
@@ -2543,9 +2571,11 @@
cma_disable_remove(id_priv, CMA_ADDR_RESOLVED))
return 0;
+ mutex_lock(&id_priv->qp_mutex);
if (!status && id_priv->id.qp)
status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
multicast->rec.mlid);
+ mutex_unlock(&id_priv->qp_mutex);
memset(&event, 0, sizeof event);
event.status = status;