mlx5: Add arbitrary sg list support
Allocate proper context for arbitrary scatterlist registration
If ib_alloc_mr is called with IB_MR_MAP_ARB_SG, the driver
allocate a private klm list instead of a private page list.
Set the UMR wqe correctly when posting the fast registration.
Also, expose device cap IB_DEVICE_MAP_ARB_SG according to the
device id (until we have a FW bit that correctly exposes it).
Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 55fa588..7e89a54 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -491,6 +491,8 @@
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW |
IB_DEVICE_MEM_WINDOW_TYPE_2B;
props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
+ /* We support 'Gappy' memory registration too */
+ props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
}
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (MLX5_CAP_GEN(mdev, sho)) {
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 3c02b3c..60b8962 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -447,6 +447,7 @@
int ndescs;
int max_descs;
int desc_size;
+ int access_mode;
struct mlx5_core_mkey mmkey;
struct ib_umem *umem;
struct mlx5_shared_mr_info *smr_info;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 628f4350..4d5bff1 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1521,8 +1521,8 @@
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_create_mkey_mbox_in *in;
struct mlx5_ib_mr *mr;
- int access_mode, err;
- int ndescs = roundup(max_num_sg, 4);
+ int ndescs = ALIGN(max_num_sg, 4);
+ int err;
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
@@ -1540,7 +1540,7 @@
in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
if (mr_type == IB_MR_TYPE_MEM_REG) {
- access_mode = MLX5_ACCESS_MODE_MTT;
+ mr->access_mode = MLX5_ACCESS_MODE_MTT;
in->seg.log2_page_size = PAGE_SHIFT;
err = mlx5_alloc_priv_descs(pd->device, mr,
@@ -1550,6 +1550,15 @@
mr->desc_size = sizeof(u64);
mr->max_descs = ndescs;
+ } else if (mr_type == IB_MR_TYPE_SG_GAPS) {
+ mr->access_mode = MLX5_ACCESS_MODE_KLM;
+
+ err = mlx5_alloc_priv_descs(pd->device, mr,
+ ndescs, sizeof(struct mlx5_klm));
+ if (err)
+ goto err_free_in;
+ mr->desc_size = sizeof(struct mlx5_klm);
+ mr->max_descs = ndescs;
} else if (mr_type == IB_MR_TYPE_SIGNATURE) {
u32 psv_index[2];
@@ -1568,7 +1577,7 @@
if (err)
goto err_free_sig;
- access_mode = MLX5_ACCESS_MODE_KLM;
+ mr->access_mode = MLX5_ACCESS_MODE_KLM;
mr->sig->psv_memory.psv_idx = psv_index[0];
mr->sig->psv_wire.psv_idx = psv_index[1];
@@ -1582,7 +1591,7 @@
goto err_free_in;
}
- in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
+ in->seg.flags = MLX5_PERM_UMR_EN | mr->access_mode;
err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, sizeof(*in),
NULL, NULL, NULL);
if (err)
@@ -1739,6 +1748,32 @@
return ret;
}
+static int
+mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
+ struct scatterlist *sgl,
+ unsigned short sg_nents)
+{
+ struct scatterlist *sg = sgl;
+ struct mlx5_klm *klms = mr->descs;
+ u32 lkey = mr->ibmr.pd->local_dma_lkey;
+ int i;
+
+ mr->ibmr.iova = sg_dma_address(sg);
+ mr->ibmr.length = 0;
+ mr->ndescs = sg_nents;
+
+ for_each_sg(sgl, sg, sg_nents, i) {
+ if (unlikely(i > mr->max_descs))
+ break;
+ klms[i].va = cpu_to_be64(sg_dma_address(sg));
+ klms[i].bcount = cpu_to_be32(sg_dma_len(sg));
+ klms[i].key = cpu_to_be32(lkey);
+ mr->ibmr.length += sg_dma_len(sg);
+ }
+
+ return i;
+}
+
static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
{
struct mlx5_ib_mr *mr = to_mmr(ibmr);
@@ -1766,7 +1801,10 @@
mr->desc_size * mr->max_descs,
DMA_TO_DEVICE);
- n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page);
+ if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+ n = mlx5_ib_sg_to_klms(mr, sg, sg_nents);
+ else
+ n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page);
ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
mr->desc_size * mr->max_descs,
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 295eb2a..8dee8bc 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2629,6 +2629,11 @@
int ndescs = mr->ndescs;
memset(umr, 0, sizeof(*umr));
+
+ if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+ /* KLMs take twice the size of MTTs */
+ ndescs *= 2;
+
umr->flags = MLX5_UMR_CHECK_NOT_FREE;
umr->klm_octowords = get_klm_octo(ndescs);
umr->mkey_mask = frwr_mkey_mask();
@@ -2767,13 +2772,19 @@
int ndescs = ALIGN(mr->ndescs, 8) >> 1;
memset(seg, 0, sizeof(*seg));
- seg->flags = get_umr_flags(access) | MLX5_ACCESS_MODE_MTT;
+
+ if (mr->access_mode == MLX5_ACCESS_MODE_MTT)
+ seg->log2_page_size = ilog2(mr->ibmr.page_size);
+ else if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+ /* KLMs take twice the size of MTTs */
+ ndescs *= 2;
+
+ seg->flags = get_umr_flags(access) | mr->access_mode;
seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
seg->start_addr = cpu_to_be64(mr->ibmr.iova);
seg->len = cpu_to_be64(mr->ibmr.length);
seg->xlt_oct_size = cpu_to_be32(ndescs);
- seg->log2_page_size = ilog2(mr->ibmr.page_size);
}
static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)