IB/qib: RDMA lkey/rkey validation is inefficient for large MRs
The current code loops during rkey/lkey validiation to isolate the MR
for the RDMA, which is expensive when the current operation is inside
a very large memory region.
This fix optimizes rkey/lkey validation routines for user memory
regions and fast memory regions. The MR entry can be isolated by
shifts/mods instead of looping. The existing loop is preserved for
phys memory regions for now.
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index 5f95f0f..08944e2 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -39,7 +39,6 @@
/* Fast memory region */
struct qib_fmr {
struct ib_fmr ibfmr;
- u8 page_shift;
struct qib_mregion mr; /* must be last */
};
@@ -107,6 +106,7 @@
goto bail;
}
mr->mr.mapsz = m;
+ mr->mr.page_shift = 0;
mr->mr.max_segs = count;
/*
@@ -231,6 +231,8 @@
mr->mr.access_flags = mr_access_flags;
mr->umem = umem;
+ if (is_power_of_2(umem->page_size))
+ mr->mr.page_shift = ilog2(umem->page_size);
m = 0;
n = 0;
list_for_each_entry(chunk, &umem->chunk_list, list) {
@@ -390,7 +392,7 @@
fmr->mr.offset = 0;
fmr->mr.access_flags = mr_access_flags;
fmr->mr.max_segs = fmr_attr->max_pages;
- fmr->page_shift = fmr_attr->page_shift;
+ fmr->mr.page_shift = fmr_attr->page_shift;
atomic_set(&fmr->mr.refcount, 0);
ret = &fmr->ibfmr;
@@ -437,7 +439,7 @@
spin_lock_irqsave(&rkt->lock, flags);
fmr->mr.user_base = iova;
fmr->mr.iova = iova;
- ps = 1 << fmr->page_shift;
+ ps = 1 << fmr->mr.page_shift;
fmr->mr.length = list_len * ps;
m = 0;
n = 0;