mlx5_core: Add support for page faults events and low level handling
* Add a handler function pointer in the mlx5_core_qp struct for page
fault events. Handle page fault events by calling the handler
function, if not NULL.
* Add on-demand paging capability query command.
* Export command for resuming QPs after page faults.
* Add various constants related to paging support.
Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Shachar Raindel <raindel@mellanox.com>
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 096abe5..70c2823 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -120,6 +120,15 @@
};
enum {
+ MLX5_MKEY_INBOX_PG_ACCESS = 1 << 31
+};
+
+enum {
+ MLX5_PFAULT_SUBTYPE_WQE = 0,
+ MLX5_PFAULT_SUBTYPE_RDMA = 1,
+};
+
+enum {
MLX5_PERM_LOCAL_READ = 1 << 2,
MLX5_PERM_LOCAL_WRITE = 1 << 3,
MLX5_PERM_REMOTE_READ = 1 << 4,
@@ -215,6 +224,8 @@
MLX5_EVENT_TYPE_CMD = 0x0a,
MLX5_EVENT_TYPE_PAGE_REQUEST = 0xb,
+
+ MLX5_EVENT_TYPE_PAGE_FAULT = 0xc,
};
enum {
@@ -300,6 +311,8 @@
enum {
HCA_CAP_OPMOD_GET_MAX = 0,
HCA_CAP_OPMOD_GET_CUR = 1,
+ HCA_CAP_OPMOD_GET_ODP_MAX = 4,
+ HCA_CAP_OPMOD_GET_ODP_CUR = 5
};
struct mlx5_inbox_hdr {
@@ -329,6 +342,23 @@
u8 vsd_psid[16];
};
+enum mlx5_odp_transport_cap_bits {
+ MLX5_ODP_SUPPORT_SEND = 1 << 31,
+ MLX5_ODP_SUPPORT_RECV = 1 << 30,
+ MLX5_ODP_SUPPORT_WRITE = 1 << 29,
+ MLX5_ODP_SUPPORT_READ = 1 << 28,
+};
+
+struct mlx5_odp_caps {
+ char reserved[0x10];
+ struct {
+ __be32 rc_odp_caps;
+ __be32 uc_odp_caps;
+ __be32 ud_odp_caps;
+ } per_transport_caps;
+ char reserved2[0xe4];
+};
+
struct mlx5_cmd_init_hca_mbox_in {
struct mlx5_inbox_hdr hdr;
u8 rsvd0[2];
@@ -449,6 +479,27 @@
__be32 rsvd1[5];
};
+struct mlx5_eqe_page_fault {
+ __be32 bytes_committed;
+ union {
+ struct {
+ u16 reserved1;
+ __be16 wqe_index;
+ u16 reserved2;
+ __be16 packet_length;
+ u8 reserved3[12];
+ } __packed wqe;
+ struct {
+ __be32 r_key;
+ u16 reserved1;
+ __be16 packet_length;
+ __be32 rdma_op_len;
+ __be64 rdma_va;
+ } __packed rdma;
+ } __packed;
+ __be32 flags_qpn;
+} __packed;
+
union ev_data {
__be32 raw[7];
struct mlx5_eqe_cmd cmd;
@@ -460,6 +511,7 @@
struct mlx5_eqe_congestion cong;
struct mlx5_eqe_stall_vl stall_vl;
struct mlx5_eqe_page_req req_pages;
+ struct mlx5_eqe_page_fault page_fault;
} __packed;
struct mlx5_eqe {
@@ -826,7 +878,7 @@
struct mlx5_create_mkey_mbox_in {
struct mlx5_inbox_hdr hdr;
__be32 input_mkey_index;
- u8 rsvd0[4];
+ __be32 flags;
struct mlx5_mkey_seg seg;
u8 rsvd1[16];
__be32 xlat_oct_act_size;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index b1bf415..7088dcd 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -113,6 +113,13 @@
MLX5_REG_HOST_ENDIANNESS = 0x7004,
};
+enum mlx5_page_fault_resume_flags {
+ MLX5_PAGE_FAULT_RESUME_REQUESTOR = 1 << 0,
+ MLX5_PAGE_FAULT_RESUME_WRITE = 1 << 1,
+ MLX5_PAGE_FAULT_RESUME_RDMA = 1 << 2,
+ MLX5_PAGE_FAULT_RESUME_ERROR = 1 << 7,
+};
+
enum dbg_rsc_type {
MLX5_DBG_RSC_QP,
MLX5_DBG_RSC_EQ,
@@ -703,6 +710,9 @@
void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
+#endif
void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector);
@@ -740,6 +750,8 @@
int npsvs, u32 *sig_index);
int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num);
void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common);
+int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
+ struct mlx5_odp_caps *odp_caps);
static inline u32 mlx5_mkey_to_idx(u32 mkey)
{
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 67f4b96..6b1d6f6 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -50,6 +50,9 @@
#define MLX5_BSF_APPTAG_ESCAPE 0x1
#define MLX5_BSF_APPREF_ESCAPE 0x2
+#define MLX5_QPN_BITS 24
+#define MLX5_QPN_MASK ((1 << MLX5_QPN_BITS) - 1)
+
enum mlx5_qp_optpar {
MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0,
MLX5_QP_OPTPAR_RRE = 1 << 1,
@@ -363,9 +366,46 @@
__be16 num_entries;
};
+enum mlx5_pagefault_flags {
+ MLX5_PFAULT_REQUESTOR = 1 << 0,
+ MLX5_PFAULT_WRITE = 1 << 1,
+ MLX5_PFAULT_RDMA = 1 << 2,
+};
+
+/* Contains the details of a pagefault. */
+struct mlx5_pagefault {
+ u32 bytes_committed;
+ u8 event_subtype;
+ enum mlx5_pagefault_flags flags;
+ union {
+ /* Initiator or send message responder pagefault details. */
+ struct {
+ /* Received packet size, only valid for responders. */
+ u32 packet_size;
+ /*
+ * WQE index. Refers to either the send queue or
+ * receive queue, according to event_subtype.
+ */
+ u16 wqe_index;
+ } wqe;
+ /* RDMA responder pagefault details */
+ struct {
+ u32 r_key;
+ /*
+ * Received packet size, minimal size page fault
+ * resolution required for forward progress.
+ */
+ u32 packet_size;
+ u32 rdma_op_len;
+ u64 rdma_va;
+ } rdma;
+ };
+};
+
struct mlx5_core_qp {
struct mlx5_core_rsc_common common; /* must be first */
void (*event) (struct mlx5_core_qp *, int);
+ void (*pfault_handler)(struct mlx5_core_qp *, struct mlx5_pagefault *);
int qpn;
struct mlx5_rsc_debug *dbg;
int pid;
@@ -533,6 +573,17 @@
return radix_tree_lookup(&dev->priv.mr_table.tree, key);
}
+struct mlx5_page_fault_resume_mbox_in {
+ struct mlx5_inbox_hdr hdr;
+ __be32 flags_qpn;
+ u8 reserved[4];
+};
+
+struct mlx5_page_fault_resume_mbox_out {
+ struct mlx5_outbox_hdr hdr;
+ u8 rsvd[8];
+};
+
int mlx5_core_create_qp(struct mlx5_core_dev *dev,
struct mlx5_core_qp *qp,
struct mlx5_create_qp_mbox_in *in,
@@ -552,6 +603,10 @@
void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev);
int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
+ u8 context, int error);
+#endif
static inline const char *mlx5_qp_type_str(int type)
{