IB/mlx5: Multithreaded create MR

Use asynchronous commands to execute up to eight concurrent create MR
commands. This is to fill memory caches faster so we keep consuming
from there.  Also, increase timeout for shrinking caches to five
minutes.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 6b8c496..513619a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -557,9 +557,11 @@
 struct mlx5_cmd_work_ent {
 	struct mlx5_cmd_msg    *in;
 	struct mlx5_cmd_msg    *out;
+	void		       *uout;
+	int			uout_size;
 	mlx5_cmd_cbk_t		callback;
 	void		       *context;
-	int idx;
+	int			idx;
 	struct completion	done;
 	struct mlx5_cmd        *cmd;
 	struct work_struct	work;
@@ -570,6 +572,7 @@
 	u8			token;
 	struct timespec		ts1;
 	struct timespec		ts2;
+	u16			op;
 };
 
 struct mlx5_pas {
@@ -653,6 +656,9 @@
 int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 		  int out_size);
+int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
+		     void *out, int out_size, mlx5_cmd_cbk_t callback,
+		     void *context);
 int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
 int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
 int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
@@ -676,7 +682,9 @@
 int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 		      u16 lwm, int is_srq);
 int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
-			  struct mlx5_create_mkey_mbox_in *in, int inlen);
+			  struct mlx5_create_mkey_mbox_in *in, int inlen,
+			  mlx5_cmd_cbk_t callback, void *context,
+			  struct mlx5_create_mkey_mbox_out *out);
 int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr);
 int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
 			 struct mlx5_query_mkey_mbox_out *out, int outlen);
@@ -745,6 +753,11 @@
 	return mkey_idx << 8;
 }
 
+static inline u8 mlx5_mkey_variant(u32 mkey)
+{
+	return mkey & 0xff;
+}
+
 enum {
 	MLX5_PROF_MASK_QP_SIZE		= (u64)1 << 0,
 	MLX5_PROF_MASK_MR_CACHE		= (u64)1 << 1,