iw_cxgb4: add referencing to wait objects

For messages sent from the host to fw that solicit a reply from fw,
the c4iw_wr_wait struct pointer is passed in the host->fw message, and
included in the fw->host fw6_msg reply.  This allows the sender to wait
until the reply is received, and the code processing the ingress reply
to wake up the sender.

If c4iw_wait_for_reply() times out, however, we need to keep the
c4iw_wr_wait object around in case the reply eventually does arrive.
Otherwise we have touch-after-free bugs in the wake_up paths.

This was hit due to a bad kernel driver that blocked ingress processing
of cxgb4 for a long time, causing iw_cxgb4 timeouts, but eventually
resuming ingress processing and thus hitting the touch-after-free bug.

So I want to fix iw_cxgb4 such that we'll at least keep the wait object
around until the reply comes.  If it never comes we leak a small amount of
memory, but if it does come late, we won't potentially crash the system.

So add a kref struct in the c4iw_wr_wait struct, and take a reference
before sending a message to FW that will generate a FW6 reply.  And remove
the reference (and potentially free the wait object) when the reply
is processed.

The ep code also uses the wr_wait for non FW6 CPL messages and doesn't
embed the c4iw_wr_wait object in the message sent to firmware.  So for
those cases we add c4iw_wake_up_noref().

The mr/mw, cq, and qp object create/destroy paths do need this reference
logic.  For these paths, c4iw_ref_send_wait() is introduced to take the
wr_wait reference, send the msg to fw, and then wait for the reply.

So going forward, iw_cxgb4 either uses c4iw_ofld_send(),
c4iw_wait_for_reply() and c4iw_wake_up_noref() like is done in the some
of the endpoint logic, or c4iw_ref_send_wait() and c4iw_wake_up_deref()
(formerly c4iw_wake_up()) when sending messages with the c4iw_wr_wait
object pointer embedded in the message and resulting FW6 reply.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index cec2be5..56655a0 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -353,11 +353,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 	res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr);
 
 	c4iw_init_wr_wait(wr_waitp);
-
-	ret = c4iw_ofld_send(rdev, skb);
-	if (ret)
-		goto free_dma;
-	ret = c4iw_wait_for_reply(rdev, wr_waitp, 0, wq->sq.qid, __func__);
+	ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->sq.qid, __func__);
 	if (ret)
 		goto free_dma;
 
@@ -730,7 +726,7 @@ static void free_qp_work(struct work_struct *work)
 
 	if (ucontext)
 		c4iw_put_ucontext(ucontext);
-	kfree(qhp->wr_waitp);
+	c4iw_put_wr_wait(qhp->wr_waitp);
 	kfree(qhp);
 }
 
@@ -1358,13 +1354,10 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
 	wqe->cookie = (uintptr_t)ep->com.wr_waitp;
 
 	wqe->u.fini.type = FW_RI_TYPE_FINI;
-	ret = c4iw_ofld_send(&rhp->rdev, skb);
-	if (ret)
-		goto out;
 
-	ret = c4iw_wait_for_reply(&rhp->rdev, ep->com.wr_waitp, qhp->ep->hwtid,
-			     qhp->wq.sq.qid, __func__);
-out:
+	ret = c4iw_ref_send_wait(&rhp->rdev, skb, ep->com.wr_waitp,
+				 qhp->ep->hwtid, qhp->wq.sq.qid, __func__);
+
 	pr_debug("ret %d\n", ret);
 	return ret;
 }
@@ -1462,15 +1455,11 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
 	if (qhp->attr.mpa_attr.initiator)
 		build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init);
 
-	ret = c4iw_ofld_send(&rhp->rdev, skb);
-	if (ret)
-		goto err1;
-
-	ret = c4iw_wait_for_reply(&rhp->rdev, qhp->ep->com.wr_waitp,
-				  qhp->ep->hwtid, qhp->wq.sq.qid, __func__);
+	ret = c4iw_ref_send_wait(&rhp->rdev, skb, qhp->ep->com.wr_waitp,
+				 qhp->ep->hwtid, qhp->wq.sq.qid, __func__);
 	if (!ret)
 		goto out;
-err1:
+
 	free_ird(rhp, qhp->attr.max_ird);
 out:
 	pr_debug("ret %d\n", ret);
@@ -1796,7 +1785,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 	if (!qhp)
 		return ERR_PTR(-ENOMEM);
 
-	qhp->wr_waitp = kzalloc(sizeof(*qhp), GFP_KERNEL);
+	qhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
 	if (!qhp->wr_waitp) {
 		ret = -ENOMEM;
 		goto err_free_qhp;
@@ -1963,7 +1952,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 	destroy_qp(&rhp->rdev, &qhp->wq,
 		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
 err_free_wr_wait:
-	kfree(qhp->wr_waitp);
+	c4iw_put_wr_wait(qhp->wr_waitp);
 err_free_qhp:
 	kfree(qhp);
 	return ERR_PTR(ret);