IB/rdmavt: Handle dereg of inuse MRs properly

A destroy of an MR prior to destroying the QP can cause the following
diagnostic if the QP is referencing the MR being de-registered:

hfi1 0000:05:00.0: hfi1_0: rvt_dereg_mr timeout mr ffff8808562108
              00 pd ffff880859b20b00

The solution is to when the a non-zero refcount is encountered when
the MR is destroyed the QPs needs to be iterated looking for QPs in
the same PD as the MR.  If rvt_qp_mr_clean() detects any such QP
references the rkey/lkey, the QP needs to be put into an error state
via a call to rvt_qp_error() which will trigger the clean up of any
stuck references.

This solution is as specified in IBTA 1.3 Volume 1 11.2.10.5.

[This is reproduced with the 0.4.9 version of qperf and the rc_bw test]

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 1b3801f..4271351 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -441,6 +441,105 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 }
 
 /**
+ * rvt_dereg_clean_qp_cb - callback from iterator
+ * @qp - the qp
+ * @v - the mregion (as u64)
+ *
+ * This routine fields the callback for all QPs and
+ * for QPs in the same PD as the MR will call the
+ * rvt_qp_mr_clean() to potentially cleanup references.
+ */
+static void rvt_dereg_clean_qp_cb(struct rvt_qp *qp, u64 v)
+{
+	struct rvt_mregion *mr = (struct rvt_mregion *)v;
+
+	/* skip PDs that are not ours */
+	if (mr->pd != qp->ibqp.pd)
+		return;
+	rvt_qp_mr_clean(qp, mr->lkey);
+}
+
+/**
+ * rvt_dereg_clean_qps - find QPs for reference cleanup
+ * @mr - the MR that is being deregistered
+ *
+ * This routine iterates RC QPs looking for references
+ * to the lkey noted in mr.
+ */
+static void rvt_dereg_clean_qps(struct rvt_mregion *mr)
+{
+	struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device);
+
+	rvt_qp_iter(rdi, (u64)mr, rvt_dereg_clean_qp_cb);
+}
+
+/**
+ * rvt_check_refs - check references
+ * @mr - the megion
+ * @t - the caller identification
+ *
+ * This routine checks MRs holding a reference during
+ * when being de-registered.
+ *
+ * If the count is non-zero, the code calls a clean routine then
+ * waits for the timeout for the count to zero.
+ */
+static int rvt_check_refs(struct rvt_mregion *mr, const char *t)
+{
+	unsigned long timeout;
+	struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device);
+
+	if (percpu_ref_is_zero(&mr->refcount))
+		return 0;
+	/* avoid dma mr */
+	if (mr->lkey)
+		rvt_dereg_clean_qps(mr);
+	timeout = wait_for_completion_timeout(&mr->comp, 5 * HZ);
+	if (!timeout) {
+		rvt_pr_err(rdi,
+			   "%s timeout mr %p pd %p lkey %x refcount %ld\n",
+			   t, mr, mr->pd, mr->lkey,
+			   atomic_long_read(&mr->refcount.count));
+		rvt_get_mr(mr);
+		return -EBUSY;
+	}
+	return 0;
+}
+
+/**
+ * rvt_mr_has_lkey - is MR
+ * @mr - the mregion
+ * @lkey - the lkey
+ */
+bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey)
+{
+	return mr && lkey == mr->lkey;
+}
+
+/**
+ * rvt_ss_has_lkey - is mr in sge tests
+ * @ss - the sge state
+ * @lkey
+ *
+ * This code tests for an MR in the indicated
+ * sge state.
+ */
+bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey)
+{
+	int i;
+	bool rval = false;
+
+	if (!ss->num_sge)
+		return rval;
+	/* first one */
+	rval = rvt_mr_has_lkey(ss->sge.mr, lkey);
+	/* any others */
+	for (i = 0; !rval && i < ss->num_sge - 1; i++)
+		rval = rvt_mr_has_lkey(ss->sg_list[i].mr, lkey);
+	return rval;
+}
+
+/**
  * rvt_dereg_mr - unregister and free a memory region
  * @ibmr: the memory region to free
  *
@@ -453,22 +552,14 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 int rvt_dereg_mr(struct ib_mr *ibmr)
 {
 	struct rvt_mr *mr = to_imr(ibmr);
-	struct rvt_dev_info *rdi = ib_to_rvt(ibmr->pd->device);
-	int ret = 0;
-	unsigned long timeout;
+	int ret;
 
 	rvt_free_lkey(&mr->mr);
 
 	rvt_put_mr(&mr->mr); /* will set completion if last */
-	timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ);
-	if (!timeout) {
-		rvt_pr_err(rdi,
-			   "rvt_dereg_mr timeout mr %p pd %p\n",
-			   mr, mr->mr.pd);
-		rvt_get_mr(&mr->mr);
-		ret = -EBUSY;
+	ret = rvt_check_refs(&mr->mr, __func__);
+	if (ret)
 		goto out;
-	}
 	rvt_deinit_mregion(&mr->mr);
 	if (mr->umem)
 		ib_umem_release(mr->umem);
@@ -761,16 +852,12 @@ int rvt_dealloc_fmr(struct ib_fmr *ibfmr)
 {
 	struct rvt_fmr *fmr = to_ifmr(ibfmr);
 	int ret = 0;
-	unsigned long timeout;
 
 	rvt_free_lkey(&fmr->mr);
 	rvt_put_mr(&fmr->mr); /* will set completion if last */
-	timeout = wait_for_completion_timeout(&fmr->mr.comp, 5 * HZ);
-	if (!timeout) {
-		rvt_get_mr(&fmr->mr);
-		ret = -EBUSY;
+	ret = rvt_check_refs(&fmr->mr, __func__);
+	if (ret)
 		goto out;
-	}
 	rvt_deinit_mregion(&fmr->mr);
 	kfree(fmr);
 out: