IB/core: Add "type 2" memory windows support

This patch enhances the IB core support for Memory Windows (MWs).

MWs allow an application to have better/flexible control over remote
access to memory.

Two types of MWs are supported, with the second type having two flavors:

    Type 1  - associated with PD only
    Type 2A - associated with QPN only
    Type 2B - associated with PD and QPN

Applications can allocate a MW once, and then repeatedly bind the MW
to different ranges in MRs that are associated to the same PD. Type 1
windows are bound through a verb, while type 2 windows are bound by
posting a work request.

The 32-bit memory key is composed of a 24-bit index and an 8-bit
key. The key is changed with each bind, thus allowing more control
over the peer's use of the memory key.

The changes introduced are the following:

* add memory window type enum and a corresponding parameter to ib_alloc_mw.
* type 2 memory window bind work request support.
* create a struct that contains the common part of the bind verb struct
  ibv_mw_bind and the bind work request into a single struct.
* add the ib_inc_rkey helper function to advance the tag part of an rkey.

Consumer interface details:

* new device capability flags IB_DEVICE_MEM_WINDOW_TYPE_2A and
  IB_DEVICE_MEM_WINDOW_TYPE_2B are added to indicate device support
  for these features.

  Devices can set either IB_DEVICE_MEM_WINDOW_TYPE_2A or
  IB_DEVICE_MEM_WINDOW_TYPE_2B if it supports type 2A or type 2B
  memory windows. It can set neither to indicate it doesn't support
  type 2 windows at all.

* modify existing provides and consumers code to the new param of
  ib_alloc_mw and the ib_mw_bind_info structure

Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Shani Michaeli <shanim@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 46bc045..98cc4b2 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -115,6 +115,8 @@
 	IB_DEVICE_XRC			= (1<<20),
 	IB_DEVICE_MEM_MGT_EXTENSIONS	= (1<<21),
 	IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
+	IB_DEVICE_MEM_WINDOW_TYPE_2A	= (1<<23),
+	IB_DEVICE_MEM_WINDOW_TYPE_2B	= (1<<24)
 };
 
 enum ib_atomic_cap {
@@ -715,6 +717,11 @@
 	IB_MIG_ARMED
 };
 
+enum ib_mw_type {
+	IB_MW_TYPE_1 = 1,
+	IB_MW_TYPE_2 = 2
+};
+
 struct ib_qp_attr {
 	enum ib_qp_state	qp_state;
 	enum ib_qp_state	cur_qp_state;
@@ -758,6 +765,7 @@
 	IB_WR_FAST_REG_MR,
 	IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
 	IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
+	IB_WR_BIND_MW,
 };
 
 enum ib_send_flags {
@@ -780,6 +788,23 @@
 	unsigned int		max_page_list_len;
 };
 
+/**
+ * struct ib_mw_bind_info - Parameters for a memory window bind operation.
+ * @mr: A memory region to bind the memory window to.
+ * @addr: The address where the memory window should begin.
+ * @length: The length of the memory window, in bytes.
+ * @mw_access_flags: Access flags from enum ib_access_flags for the window.
+ *
+ * This struct contains the shared parameters for type 1 and type 2
+ * memory window bind operations.
+ */
+struct ib_mw_bind_info {
+	struct ib_mr   *mr;
+	u64		addr;
+	u64		length;
+	int		mw_access_flags;
+};
+
 struct ib_send_wr {
 	struct ib_send_wr      *next;
 	u64			wr_id;
@@ -823,6 +848,12 @@
 			int				access_flags;
 			u32				rkey;
 		} fast_reg;
+		struct {
+			struct ib_mw            *mw;
+			/* The new rkey for the memory window. */
+			u32                      rkey;
+			struct ib_mw_bind_info   bind_info;
+		} bind_mw;
 	} wr;
 	u32			xrc_remote_srq_num;	/* XRC TGT QPs only */
 };
@@ -839,7 +870,8 @@
 	IB_ACCESS_REMOTE_WRITE	= (1<<1),
 	IB_ACCESS_REMOTE_READ	= (1<<2),
 	IB_ACCESS_REMOTE_ATOMIC	= (1<<3),
-	IB_ACCESS_MW_BIND	= (1<<4)
+	IB_ACCESS_MW_BIND	= (1<<4),
+	IB_ZERO_BASED		= (1<<5)
 };
 
 struct ib_phys_buf {
@@ -862,13 +894,16 @@
 	IB_MR_REREG_ACCESS	= (1<<2)
 };
 
+/**
+ * struct ib_mw_bind - Parameters for a type 1 memory window bind operation.
+ * @wr_id:      Work request id.
+ * @send_flags: Flags from ib_send_flags enum.
+ * @bind_info:  More parameters of the bind operation.
+ */
 struct ib_mw_bind {
-	struct ib_mr   *mr;
-	u64		wr_id;
-	u64		addr;
-	u32		length;
-	int		send_flags;
-	int		mw_access_flags;
+	u64                    wr_id;
+	int                    send_flags;
+	struct ib_mw_bind_info bind_info;
 };
 
 struct ib_fmr_attr {
@@ -991,6 +1026,7 @@
 	struct ib_pd		*pd;
 	struct ib_uobject	*uobject;
 	u32			rkey;
+	enum ib_mw_type         type;
 };
 
 struct ib_fmr {
@@ -1202,7 +1238,8 @@
 						    int num_phys_buf,
 						    int mr_access_flags,
 						    u64 *iova_start);
-	struct ib_mw *             (*alloc_mw)(struct ib_pd *pd);
+	struct ib_mw *             (*alloc_mw)(struct ib_pd *pd,
+					       enum ib_mw_type type);
 	int                        (*bind_mw)(struct ib_qp *qp,
 					      struct ib_mw *mw,
 					      struct ib_mw_bind *mw_bind);
@@ -2019,6 +2056,8 @@
  * ib_dereg_mr - Deregisters a memory region and removes it from the
  *   HCA translation table.
  * @mr: The memory region to deregister.
+ *
+ * This function can fail, if the memory region has memory windows bound to it.
  */
 int ib_dereg_mr(struct ib_mr *mr);
 
@@ -2071,10 +2110,22 @@
 }
 
 /**
+ * ib_inc_rkey - increments the key portion of the given rkey. Can be used
+ * for calculating a new rkey for type 2 memory windows.
+ * @rkey - the rkey to increment.
+ */
+static inline u32 ib_inc_rkey(u32 rkey)
+{
+	const u32 mask = 0x000000ff;
+	return ((rkey + 1) & mask) | (rkey & ~mask);
+}
+
+/**
  * ib_alloc_mw - Allocates a memory window.
  * @pd: The protection domain associated with the memory window.
+ * @type: The type of the memory window (1 or 2).
  */
-struct ib_mw *ib_alloc_mw(struct ib_pd *pd);
+struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
 
 /**
  * ib_bind_mw - Posts a work request to the send queue of the specified
@@ -2084,6 +2135,10 @@
  * @mw: The memory window to bind.
  * @mw_bind: Specifies information about the memory window, including
  *   its address range, remote access rights, and associated memory region.
+ *
+ * If there is no immediate error, the function will update the rkey member
+ * of the mw parameter to its new value. The bind operation can still fail
+ * asynchronously.
  */
 static inline int ib_bind_mw(struct ib_qp *qp,
 			     struct ib_mw *mw,