Merge branches 'misc', 'qedr', 'reject-helpers', 'rxe' and 'srp' into merge-test
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 4fa524d..11dacd9 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -156,7 +156,6 @@
 	/* Create new device info */
 	port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
 	if (!port_priv) {
-		dev_err(&device->dev, "No memory for ib_agent_port_private\n");
 		ret = -ENOMEM;
 		goto error1;
 	}
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 1a2984c..ae04826 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -770,12 +770,8 @@
 	int err = 0;
 
 	table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL);
-
-	if (!table) {
-		pr_warn("failed to allocate ib gid cache for %s\n",
-			ib_dev->name);
+	if (!table)
 		return -ENOMEM;
-	}
 
 	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
 		u8 rdma_port = port + rdma_start_port(ib_dev);
@@ -1170,14 +1166,13 @@
 					  GFP_KERNEL);
 	if (!device->cache.pkey_cache ||
 	    !device->cache.lmc_cache) {
-		pr_warn("Couldn't allocate cache for %s\n", device->name);
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto free;
 	}
 
 	err = gid_table_setup_one(device);
 	if (err)
-		/* Allocated memory will be cleaned in the release function */
-		return err;
+		goto free;
 
 	for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
 		ib_cache_update(device, p + rdma_start_port(device));
@@ -1192,6 +1187,9 @@
 
 err:
 	gid_table_cleanup_one(device);
+free:
+	kfree(device->cache.pkey_cache);
+	kfree(device->cache.lmc_cache);
 	return err;
 }
 
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index c97e4d5..0d4b114 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -57,6 +57,54 @@
 MODULE_DESCRIPTION("InfiniBand CM");
 MODULE_LICENSE("Dual BSD/GPL");
 
+static const char * const ibcm_rej_reason_strs[] = {
+	[IB_CM_REJ_NO_QP]			= "no QP",
+	[IB_CM_REJ_NO_EEC]			= "no EEC",
+	[IB_CM_REJ_NO_RESOURCES]		= "no resources",
+	[IB_CM_REJ_TIMEOUT]			= "timeout",
+	[IB_CM_REJ_UNSUPPORTED]			= "unsupported",
+	[IB_CM_REJ_INVALID_COMM_ID]		= "invalid comm ID",
+	[IB_CM_REJ_INVALID_COMM_INSTANCE]	= "invalid comm instance",
+	[IB_CM_REJ_INVALID_SERVICE_ID]		= "invalid service ID",
+	[IB_CM_REJ_INVALID_TRANSPORT_TYPE]	= "invalid transport type",
+	[IB_CM_REJ_STALE_CONN]			= "stale conn",
+	[IB_CM_REJ_RDC_NOT_EXIST]		= "RDC not exist",
+	[IB_CM_REJ_INVALID_GID]			= "invalid GID",
+	[IB_CM_REJ_INVALID_LID]			= "invalid LID",
+	[IB_CM_REJ_INVALID_SL]			= "invalid SL",
+	[IB_CM_REJ_INVALID_TRAFFIC_CLASS]	= "invalid traffic class",
+	[IB_CM_REJ_INVALID_HOP_LIMIT]		= "invalid hop limit",
+	[IB_CM_REJ_INVALID_PACKET_RATE]		= "invalid packet rate",
+	[IB_CM_REJ_INVALID_ALT_GID]		= "invalid alt GID",
+	[IB_CM_REJ_INVALID_ALT_LID]		= "invalid alt LID",
+	[IB_CM_REJ_INVALID_ALT_SL]		= "invalid alt SL",
+	[IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS]	= "invalid alt traffic class",
+	[IB_CM_REJ_INVALID_ALT_HOP_LIMIT]	= "invalid alt hop limit",
+	[IB_CM_REJ_INVALID_ALT_PACKET_RATE]	= "invalid alt packet rate",
+	[IB_CM_REJ_PORT_CM_REDIRECT]		= "port CM redirect",
+	[IB_CM_REJ_PORT_REDIRECT]		= "port redirect",
+	[IB_CM_REJ_INVALID_MTU]			= "invalid MTU",
+	[IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES]	= "insufficient resp resources",
+	[IB_CM_REJ_CONSUMER_DEFINED]		= "consumer defined",
+	[IB_CM_REJ_INVALID_RNR_RETRY]		= "invalid RNR retry",
+	[IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID]	= "duplicate local comm ID",
+	[IB_CM_REJ_INVALID_CLASS_VERSION]	= "invalid class version",
+	[IB_CM_REJ_INVALID_FLOW_LABEL]		= "invalid flow label",
+	[IB_CM_REJ_INVALID_ALT_FLOW_LABEL]	= "invalid alt flow label",
+};
+
+const char *__attribute_const__ ibcm_reject_msg(int reason)
+{
+	size_t index = reason;
+
+	if (index < ARRAY_SIZE(ibcm_rej_reason_strs) &&
+	    ibcm_rej_reason_strs[index])
+		return ibcm_rej_reason_strs[index];
+	else
+		return "unrecognized reason";
+}
+EXPORT_SYMBOL(ibcm_reject_msg);
+
 static void cm_add_one(struct ib_device *device);
 static void cm_remove_one(struct ib_device *device, void *client_data);
 
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 36bf50e..8c30e3d 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -101,6 +101,49 @@
 }
 EXPORT_SYMBOL(rdma_event_msg);
 
+const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
+						int reason)
+{
+	if (rdma_ib_or_roce(id->device, id->port_num))
+		return ibcm_reject_msg(reason);
+
+	if (rdma_protocol_iwarp(id->device, id->port_num))
+		return iwcm_reject_msg(reason);
+
+	WARN_ON_ONCE(1);
+	return "unrecognized transport";
+}
+EXPORT_SYMBOL(rdma_reject_msg);
+
+bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
+{
+	if (rdma_ib_or_roce(id->device, id->port_num))
+		return reason == IB_CM_REJ_CONSUMER_DEFINED;
+
+	if (rdma_protocol_iwarp(id->device, id->port_num))
+		return reason == -ECONNREFUSED;
+
+	WARN_ON_ONCE(1);
+	return false;
+}
+EXPORT_SYMBOL(rdma_is_consumer_reject);
+
+const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
+				      struct rdma_cm_event *ev, u8 *data_len)
+{
+	const void *p;
+
+	if (rdma_is_consumer_reject(id, ev->status)) {
+		*data_len = ev->param.conn.private_data_len;
+		p = ev->param.conn.private_data;
+	} else {
+		*data_len = 0;
+		p = NULL;
+	}
+	return p;
+}
+EXPORT_SYMBOL(rdma_consumer_reject_data);
+
 static void cma_add_one(struct ib_device *device);
 static void cma_remove_one(struct ib_device *device, void *client_data);
 
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 19d499d..1acc95b 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -72,9 +72,6 @@
 void ib_cache_setup(void);
 void ib_cache_cleanup(void);
 
-int ib_resolve_eth_dmac(struct ib_qp *qp,
-			struct ib_qp_attr *qp_attr, int *qp_attr_mask);
-
 typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
 	      struct net_device *idev, void *cookie);
 
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 760ef60..571974c 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -254,11 +254,8 @@
 	unsigned long flags;
 
 	context = kmalloc(sizeof *context, GFP_KERNEL);
-	if (!context) {
-		pr_warn("Couldn't allocate client context for %s/%s\n",
-			device->name, client->name);
+	if (!context)
 		return -ENOMEM;
-	}
 
 	context->client = client;
 	context->data   = NULL;
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index cdbb1f1..cdfad5f 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -247,7 +247,6 @@
 			kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
 				GFP_KERNEL);
 		if (!pool->cache_bucket) {
-			pr_warn(PFX "Failed to allocate cache in pool\n");
 			ret = -ENOMEM;
 			goto out_free_pool;
 		}
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 5495e22..31661b5 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -59,6 +59,27 @@
 MODULE_DESCRIPTION("iWARP CM");
 MODULE_LICENSE("Dual BSD/GPL");
 
+static const char * const iwcm_rej_reason_strs[] = {
+	[ECONNRESET]			= "reset by remote host",
+	[ECONNREFUSED]			= "refused by remote application",
+	[ETIMEDOUT]			= "setup timeout",
+};
+
+const char *__attribute_const__ iwcm_reject_msg(int reason)
+{
+	size_t index;
+
+	/* iWARP uses negative errnos */
+	index = -reason;
+
+	if (index < ARRAY_SIZE(iwcm_rej_reason_strs) &&
+	    iwcm_rej_reason_strs[index])
+		return iwcm_rej_reason_strs[index];
+	else
+		return "unrecognized reason";
+}
+EXPORT_SYMBOL(iwcm_reject_msg);
+
 static struct ibnl_client_cbs iwcm_nl_cb_table[] = {
 	[RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
 	[RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index 1c41b95..a0e7c16 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -604,7 +604,6 @@
 	}
 	rem_info = kzalloc(sizeof(struct iwpm_remote_info), GFP_ATOMIC);
 	if (!rem_info) {
-		pr_err("%s: Unable to allocate a remote info\n", __func__);
 		ret = -ENOMEM;
 		return ret;
 	}
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index ade71e7..3ef51a9 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -62,7 +62,6 @@
 					sizeof(struct hlist_head), GFP_KERNEL);
 		if (!iwpm_hash_bucket) {
 			ret = -ENOMEM;
-			pr_err("%s Unable to create mapinfo hash table\n", __func__);
 			goto init_exit;
 		}
 		iwpm_reminfo_bucket = kzalloc(IWPM_REMINFO_HASH_SIZE *
@@ -70,7 +69,6 @@
 		if (!iwpm_reminfo_bucket) {
 			kfree(iwpm_hash_bucket);
 			ret = -ENOMEM;
-			pr_err("%s Unable to create reminfo hash table\n", __func__);
 			goto init_exit;
 		}
 	}
@@ -128,10 +126,9 @@
 	if (!iwpm_valid_client(nl_client))
 		return ret;
 	map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL);
-	if (!map_info) {
-		pr_err("%s: Unable to allocate a mapping info\n", __func__);
+	if (!map_info)
 		return -ENOMEM;
-	}
+
 	memcpy(&map_info->local_sockaddr, local_sockaddr,
 	       sizeof(struct sockaddr_storage));
 	memcpy(&map_info->mapped_sockaddr, mapped_sockaddr,
@@ -309,10 +306,9 @@
 	unsigned long flags;
 
 	nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp);
-	if (!nlmsg_request) {
-		pr_err("%s Unable to allocate a nlmsg_request\n", __func__);
+	if (!nlmsg_request)
 		return NULL;
-	}
+
 	spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
 	list_add_tail(&nlmsg_request->inprocess_list, &iwpm_nlmsg_req_list);
 	spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index d120f6f..a009f71 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -816,7 +816,6 @@
 	local = kmalloc(sizeof *local, GFP_ATOMIC);
 	if (!local) {
 		ret = -ENOMEM;
-		dev_err(&device->dev, "No memory for ib_mad_local_private\n");
 		goto out;
 	}
 	local->mad_priv = NULL;
@@ -824,7 +823,6 @@
 	mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC);
 	if (!mad_priv) {
 		ret = -ENOMEM;
-		dev_err(&device->dev, "No memory for local response MAD\n");
 		kfree(local);
 		goto out;
 	}
@@ -947,9 +945,6 @@
 	for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
 		seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
 		if (!seg) {
-			dev_err(&send_buf->mad_agent->device->dev,
-				"alloc_send_rmpp_segs: RMPP mem alloc failed for len %zd, gfp %#x\n",
-				sizeof (*seg) + seg_size, gfp_mask);
 			free_send_rmpp_list(send_wr);
 			return -ENOMEM;
 		}
@@ -1362,12 +1357,7 @@
 {
 	/* Allocate management method table */
 	*method = kzalloc(sizeof **method, GFP_ATOMIC);
-	if (!*method) {
-		pr_err("No memory for ib_mad_mgmt_method_table\n");
-		return -ENOMEM;
-	}
-
-	return 0;
+	return (*method) ? 0 : (-ENOMEM);
 }
 
 /*
@@ -1458,8 +1448,6 @@
 		/* Allocate management class table for "new" class version */
 		*class = kzalloc(sizeof **class, GFP_ATOMIC);
 		if (!*class) {
-			dev_err(&agent_priv->agent.device->dev,
-				"No memory for ib_mad_mgmt_class_table\n");
 			ret = -ENOMEM;
 			goto error1;
 		}
@@ -1524,22 +1512,16 @@
 	if (!*vendor_table) {
 		/* Allocate mgmt vendor class table for "new" class version */
 		vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
-		if (!vendor) {
-			dev_err(&agent_priv->agent.device->dev,
-				"No memory for ib_mad_mgmt_vendor_class_table\n");
+		if (!vendor)
 			goto error1;
-		}
 
 		*vendor_table = vendor;
 	}
 	if (!(*vendor_table)->vendor_class[vclass]) {
 		/* Allocate table for this management vendor class */
 		vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
-		if (!vendor_class) {
-			dev_err(&agent_priv->agent.device->dev,
-				"No memory for ib_mad_mgmt_vendor_class\n");
+		if (!vendor_class)
 			goto error2;
-		}
 
 		(*vendor_table)->vendor_class[vclass] = vendor_class;
 	}
@@ -2238,11 +2220,8 @@
 
 	mad_size = recv->mad_size;
 	response = alloc_mad_private(mad_size, GFP_KERNEL);
-	if (!response) {
-		dev_err(&port_priv->device->dev,
-			"%s: no memory for response buffer\n", __func__);
+	if (!response)
 		goto out;
-	}
 
 	if (rdma_cap_ib_switch(port_priv->device))
 		port_num = wc->port_num;
@@ -2869,8 +2848,6 @@
 			mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv),
 						     GFP_ATOMIC);
 			if (!mad_priv) {
-				dev_err(&qp_info->port_priv->device->dev,
-					"No memory for receive buffer\n");
 				ret = -ENOMEM;
 				break;
 			}
@@ -2961,11 +2938,8 @@
 	u16 pkey_index;
 
 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
-	if (!attr) {
-		dev_err(&port_priv->device->dev,
-			"Couldn't kmalloc ib_qp_attr\n");
+	if (!attr)
 		return -ENOMEM;
-	}
 
 	ret = ib_find_pkey(port_priv->device, port_priv->port_num,
 			   IB_DEFAULT_PKEY_FULL, &pkey_index);
@@ -3135,10 +3109,8 @@
 
 	/* Create new device info */
 	port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
-	if (!port_priv) {
-		dev_err(&device->dev, "No memory for ib_mad_port_private\n");
+	if (!port_priv)
 		return -ENOMEM;
-	}
 
 	port_priv->device = device;
 	port_priv->port_num = port_num;
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 06556c3..c86ddce 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -304,10 +304,9 @@
 	for_ifa(in_dev) {
 		struct sin_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 
-		if (!entry) {
-			pr_warn("roce_gid_mgmt: couldn't allocate entry for IPv4 update\n");
+		if (!entry)
 			continue;
-		}
+
 		entry->ip.sin_family = AF_INET;
 		entry->ip.sin_addr.s_addr = ifa->ifa_address;
 		list_add_tail(&entry->list, &sin_list);
@@ -348,10 +347,8 @@
 	list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
 		struct sin6_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 
-		if (!entry) {
-			pr_warn("roce_gid_mgmt: couldn't allocate entry for IPv6 update\n");
+		if (!entry)
 			continue;
-		}
 
 		entry->sin6.sin6_family = AF_INET6;
 		entry->sin6.sin6_addr = ifp->addr;
@@ -459,10 +456,8 @@
 		struct upper_list *entry = kmalloc(sizeof(*entry),
 						   GFP_ATOMIC);
 
-		if (!entry) {
-			pr_info("roce_gid_mgmt: couldn't allocate entry to delete ndev\n");
+		if (!entry)
 			continue;
-		}
 
 		list_add_tail(&entry->list, &upper_list);
 		dev_hold(upper);
@@ -555,10 +550,8 @@
 	struct netdev_event_work *ndev_work =
 		kmalloc(sizeof(*ndev_work), GFP_KERNEL);
 
-	if (!ndev_work) {
-		pr_warn("roce_gid_mgmt: can't allocate work for netdevice_event\n");
+	if (!ndev_work)
 		return NOTIFY_DONE;
-	}
 
 	memcpy(ndev_work->cmds, cmds, sizeof(ndev_work->cmds));
 	for (i = 0; i < ARRAY_SIZE(ndev_work->cmds) && ndev_work->cmds[i].cb; i++) {
@@ -692,10 +685,8 @@
 	}
 
 	work = kmalloc(sizeof(*work), GFP_ATOMIC);
-	if (!work) {
-		pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
+	if (!work)
 		return NOTIFY_DONE;
-	}
 
 	INIT_WORK(&work->work, update_gid_event_work_handler);
 
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 7713ef0..579f9a7 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -1104,8 +1104,11 @@
 	struct ib_ucm_cmd_hdr hdr;
 	ssize_t result;
 
-	if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+	if (!ib_safe_file_access(filp)) {
+		pr_err_once("ucm_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+			    task_tgid_vnr(current), current->comm);
 		return -EACCES;
+	}
 
 	if (len < sizeof(hdr))
 		return -EINVAL;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 9520154..e12f8fa 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -1584,8 +1584,11 @@
 	struct rdma_ucm_cmd_hdr hdr;
 	ssize_t ret;
 
-	if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+	if (!ib_safe_file_access(filp)) {
+		pr_err_once("ucma_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+			    task_tgid_vnr(current), current->comm);
 		return -EACCES;
+	}
 
 	if (len < sizeof(hdr))
 		return -EINVAL;
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index df26a74..455034a 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -289,5 +289,6 @@
 IB_UVERBS_DECLARE_EX_CMD(destroy_wq);
 IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table);
 IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table);
+IB_UVERBS_DECLARE_EX_CMD(modify_qp);
 
 #endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index cb3f515a..09b6491 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -2328,94 +2328,88 @@
 	}
 }
 
-ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
-			    const char __user *buf, int in_len,
-			    int out_len)
+static int modify_qp(struct ib_uverbs_file *file,
+		     struct ib_uverbs_ex_modify_qp *cmd, struct ib_udata *udata)
 {
-	struct ib_uverbs_modify_qp cmd;
-	struct ib_udata            udata;
-	struct ib_qp              *qp;
-	struct ib_qp_attr         *attr;
-	int                        ret;
-
-	if (copy_from_user(&cmd, buf, sizeof cmd))
-		return -EFAULT;
-
-	INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
-		   out_len);
+	struct ib_qp_attr *attr;
+	struct ib_qp *qp;
+	int ret;
 
 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
 	if (!attr)
 		return -ENOMEM;
 
-	qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+	qp = idr_read_qp(cmd->base.qp_handle, file->ucontext);
 	if (!qp) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	attr->qp_state 		  = cmd.qp_state;
-	attr->cur_qp_state 	  = cmd.cur_qp_state;
-	attr->path_mtu 		  = cmd.path_mtu;
-	attr->path_mig_state 	  = cmd.path_mig_state;
-	attr->qkey 		  = cmd.qkey;
-	attr->rq_psn 		  = cmd.rq_psn;
-	attr->sq_psn 		  = cmd.sq_psn;
-	attr->dest_qp_num 	  = cmd.dest_qp_num;
-	attr->qp_access_flags 	  = cmd.qp_access_flags;
-	attr->pkey_index 	  = cmd.pkey_index;
-	attr->alt_pkey_index 	  = cmd.alt_pkey_index;
-	attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
-	attr->max_rd_atomic 	  = cmd.max_rd_atomic;
-	attr->max_dest_rd_atomic  = cmd.max_dest_rd_atomic;
-	attr->min_rnr_timer 	  = cmd.min_rnr_timer;
-	attr->port_num 		  = cmd.port_num;
-	attr->timeout 		  = cmd.timeout;
-	attr->retry_cnt 	  = cmd.retry_cnt;
-	attr->rnr_retry 	  = cmd.rnr_retry;
-	attr->alt_port_num 	  = cmd.alt_port_num;
-	attr->alt_timeout 	  = cmd.alt_timeout;
+	attr->qp_state		  = cmd->base.qp_state;
+	attr->cur_qp_state	  = cmd->base.cur_qp_state;
+	attr->path_mtu		  = cmd->base.path_mtu;
+	attr->path_mig_state	  = cmd->base.path_mig_state;
+	attr->qkey		  = cmd->base.qkey;
+	attr->rq_psn		  = cmd->base.rq_psn;
+	attr->sq_psn		  = cmd->base.sq_psn;
+	attr->dest_qp_num	  = cmd->base.dest_qp_num;
+	attr->qp_access_flags	  = cmd->base.qp_access_flags;
+	attr->pkey_index	  = cmd->base.pkey_index;
+	attr->alt_pkey_index	  = cmd->base.alt_pkey_index;
+	attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify;
+	attr->max_rd_atomic	  = cmd->base.max_rd_atomic;
+	attr->max_dest_rd_atomic  = cmd->base.max_dest_rd_atomic;
+	attr->min_rnr_timer	  = cmd->base.min_rnr_timer;
+	attr->port_num		  = cmd->base.port_num;
+	attr->timeout		  = cmd->base.timeout;
+	attr->retry_cnt		  = cmd->base.retry_cnt;
+	attr->rnr_retry		  = cmd->base.rnr_retry;
+	attr->alt_port_num	  = cmd->base.alt_port_num;
+	attr->alt_timeout	  = cmd->base.alt_timeout;
+	attr->rate_limit	  = cmd->rate_limit;
 
-	memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
-	attr->ah_attr.grh.flow_label        = cmd.dest.flow_label;
-	attr->ah_attr.grh.sgid_index        = cmd.dest.sgid_index;
-	attr->ah_attr.grh.hop_limit         = cmd.dest.hop_limit;
-	attr->ah_attr.grh.traffic_class     = cmd.dest.traffic_class;
-	attr->ah_attr.dlid 	    	    = cmd.dest.dlid;
-	attr->ah_attr.sl   	    	    = cmd.dest.sl;
-	attr->ah_attr.src_path_bits 	    = cmd.dest.src_path_bits;
-	attr->ah_attr.static_rate   	    = cmd.dest.static_rate;
-	attr->ah_attr.ah_flags 	    	    = cmd.dest.is_global ? IB_AH_GRH : 0;
-	attr->ah_attr.port_num 	    	    = cmd.dest.port_num;
+	memcpy(attr->ah_attr.grh.dgid.raw, cmd->base.dest.dgid, 16);
+	attr->ah_attr.grh.flow_label	= cmd->base.dest.flow_label;
+	attr->ah_attr.grh.sgid_index	= cmd->base.dest.sgid_index;
+	attr->ah_attr.grh.hop_limit	= cmd->base.dest.hop_limit;
+	attr->ah_attr.grh.traffic_class	= cmd->base.dest.traffic_class;
+	attr->ah_attr.dlid		= cmd->base.dest.dlid;
+	attr->ah_attr.sl		= cmd->base.dest.sl;
+	attr->ah_attr.src_path_bits	= cmd->base.dest.src_path_bits;
+	attr->ah_attr.static_rate	= cmd->base.dest.static_rate;
+	attr->ah_attr.ah_flags		= cmd->base.dest.is_global ?
+					  IB_AH_GRH : 0;
+	attr->ah_attr.port_num		= cmd->base.dest.port_num;
 
-	memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
-	attr->alt_ah_attr.grh.flow_label    = cmd.alt_dest.flow_label;
-	attr->alt_ah_attr.grh.sgid_index    = cmd.alt_dest.sgid_index;
-	attr->alt_ah_attr.grh.hop_limit     = cmd.alt_dest.hop_limit;
-	attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
-	attr->alt_ah_attr.dlid 	    	    = cmd.alt_dest.dlid;
-	attr->alt_ah_attr.sl   	    	    = cmd.alt_dest.sl;
-	attr->alt_ah_attr.src_path_bits     = cmd.alt_dest.src_path_bits;
-	attr->alt_ah_attr.static_rate       = cmd.alt_dest.static_rate;
-	attr->alt_ah_attr.ah_flags 	    = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
-	attr->alt_ah_attr.port_num 	    = cmd.alt_dest.port_num;
+	memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd->base.alt_dest.dgid, 16);
+	attr->alt_ah_attr.grh.flow_label    = cmd->base.alt_dest.flow_label;
+	attr->alt_ah_attr.grh.sgid_index    = cmd->base.alt_dest.sgid_index;
+	attr->alt_ah_attr.grh.hop_limit     = cmd->base.alt_dest.hop_limit;
+	attr->alt_ah_attr.grh.traffic_class = cmd->base.alt_dest.traffic_class;
+	attr->alt_ah_attr.dlid		    = cmd->base.alt_dest.dlid;
+	attr->alt_ah_attr.sl		    = cmd->base.alt_dest.sl;
+	attr->alt_ah_attr.src_path_bits	    = cmd->base.alt_dest.src_path_bits;
+	attr->alt_ah_attr.static_rate	    = cmd->base.alt_dest.static_rate;
+	attr->alt_ah_attr.ah_flags	    = cmd->base.alt_dest.is_global ?
+					      IB_AH_GRH : 0;
+	attr->alt_ah_attr.port_num	    = cmd->base.alt_dest.port_num;
 
 	if (qp->real_qp == qp) {
-		ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask);
-		if (ret)
-			goto release_qp;
+		if (cmd->base.attr_mask & IB_QP_AV) {
+			ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
+			if (ret)
+				goto release_qp;
+		}
 		ret = qp->device->modify_qp(qp, attr,
-			modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);
+					    modify_qp_mask(qp->qp_type,
+							   cmd->base.attr_mask),
+					    udata);
 	} else {
-		ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask));
+		ret = ib_modify_qp(qp, attr,
+				   modify_qp_mask(qp->qp_type,
+						  cmd->base.attr_mask));
 	}
 
-	if (ret)
-		goto release_qp;
-
-	ret = in_len;
-
 release_qp:
 	put_qp_read(qp);
 
@@ -2425,6 +2419,68 @@
 	return ret;
 }
 
+ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
+			    struct ib_device *ib_dev,
+			    const char __user *buf, int in_len,
+			    int out_len)
+{
+	struct ib_uverbs_ex_modify_qp cmd = {};
+	struct ib_udata udata;
+	int ret;
+
+	if (copy_from_user(&cmd.base, buf, sizeof(cmd.base)))
+		return -EFAULT;
+
+	if (cmd.base.attr_mask &
+	    ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1))
+		return -EOPNOTSUPP;
+
+	INIT_UDATA(&udata, buf + sizeof(cmd.base), NULL,
+		   in_len - sizeof(cmd.base), out_len);
+
+	ret = modify_qp(file, &cmd, &udata);
+	if (ret)
+		return ret;
+
+	return in_len;
+}
+
+int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
+			   struct ib_device *ib_dev,
+			   struct ib_udata *ucore,
+			   struct ib_udata *uhw)
+{
+	struct ib_uverbs_ex_modify_qp cmd = {};
+	int ret;
+
+	/*
+	 * Last bit is reserved for extending the attr_mask by
+	 * using another field.
+	 */
+	BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31));
+
+	if (ucore->inlen < sizeof(cmd.base))
+		return -EINVAL;
+
+	ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+	if (ret)
+		return ret;
+
+	if (cmd.base.attr_mask &
+	    ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1))
+		return -EOPNOTSUPP;
+
+	if (ucore->inlen > sizeof(cmd)) {
+		if (ib_is_udata_cleared(ucore, sizeof(cmd),
+					ucore->inlen - sizeof(cmd)))
+			return -EOPNOTSUPP;
+	}
+
+	ret = modify_qp(file, &cmd, uhw);
+
+	return ret;
+}
+
 ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 			     struct ib_device *ib_dev,
 			     const char __user *buf, int in_len,
@@ -2875,6 +2931,7 @@
 	struct ib_ah			*ah;
 	struct ib_ah_attr		attr;
 	int ret;
+	struct ib_udata                   udata;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -2882,6 +2939,10 @@
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
+	INIT_UDATA(&udata, buf + sizeof(cmd),
+		   (unsigned long)cmd.response + sizeof(resp),
+		   in_len - sizeof(cmd), out_len - sizeof(resp));
+
 	uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
 	if (!uobj)
 		return -ENOMEM;
@@ -2908,12 +2969,16 @@
 	memset(&attr.dmac, 0, sizeof(attr.dmac));
 	memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
 
-	ah = ib_create_ah(pd, &attr);
+	ah = pd->device->create_ah(pd, &attr, &udata);
+
 	if (IS_ERR(ah)) {
 		ret = PTR_ERR(ah);
 		goto err_put;
 	}
 
+	ah->device  = pd->device;
+	ah->pd      = pd;
+	atomic_inc(&pd->usecnt);
 	ah->uobject  = uobj;
 	uobj->object = ah;
 
@@ -3124,8 +3189,10 @@
 	kern_spec_val = (void *)kern_spec +
 		sizeof(struct ib_uverbs_flow_spec_hdr);
 	kern_spec_mask = kern_spec_val + kern_filter_sz;
+	if (ib_spec->type == (IB_FLOW_SPEC_INNER | IB_FLOW_SPEC_VXLAN_TUNNEL))
+		return -EINVAL;
 
-	switch (ib_spec->type) {
+	switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
 	case IB_FLOW_SPEC_ETH:
 		ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz);
 		actual_filter_sz = spec_filter_size(kern_spec_mask,
@@ -3175,6 +3242,21 @@
 		memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz);
 		memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz);
 		break;
+	case IB_FLOW_SPEC_VXLAN_TUNNEL:
+		ib_filter_sz = offsetof(struct ib_flow_tunnel_filter, real_sz);
+		actual_filter_sz = spec_filter_size(kern_spec_mask,
+						    kern_filter_sz,
+						    ib_filter_sz);
+		if (actual_filter_sz <= 0)
+			return -EINVAL;
+		ib_spec->tunnel.size = sizeof(struct ib_flow_spec_tunnel);
+		memcpy(&ib_spec->tunnel.val, kern_spec_val, actual_filter_sz);
+		memcpy(&ib_spec->tunnel.mask, kern_spec_mask, actual_filter_sz);
+
+		if ((ntohl(ib_spec->tunnel.mask.tunnel_id)) >= BIT(24) ||
+		    (ntohl(ib_spec->tunnel.val.tunnel_id)) >= BIT(24))
+			return -EINVAL;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -3745,7 +3827,6 @@
 		err = PTR_ERR(flow_id);
 		goto err_free;
 	}
-	flow_id->qp = qp;
 	flow_id->uobject = uobj;
 	uobj->object = flow_id;
 
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 0012fa5..257d079 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -137,6 +137,7 @@
 	[IB_USER_VERBS_EX_CMD_DESTROY_WQ]       = ib_uverbs_ex_destroy_wq,
 	[IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table,
 	[IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table,
+	[IB_USER_VERBS_EX_CMD_MODIFY_QP]        = ib_uverbs_ex_modify_qp,
 };
 
 static void ib_uverbs_add_one(struct ib_device *device);
@@ -749,8 +750,11 @@
 	int srcu_key;
 	ssize_t ret;
 
-	if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+	if (!ib_safe_file_access(filp)) {
+		pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+			    task_tgid_vnr(current), current->comm);
 		return -EACCES;
+	}
 
 	if (count < sizeof hdr)
 		return -EINVAL;
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 8368764..71580cc 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -315,7 +315,7 @@
 {
 	struct ib_ah *ah;
 
-	ah = pd->device->create_ah(pd, ah_attr);
+	ah = pd->device->create_ah(pd, ah_attr, NULL);
 
 	if (!IS_ERR(ah)) {
 		ah->device  = pd->device;
@@ -328,7 +328,7 @@
 }
 EXPORT_SYMBOL(ib_create_ah);
 
-static int ib_get_header_version(const union rdma_network_hdr *hdr)
+int ib_get_rdma_header_version(const union rdma_network_hdr *hdr)
 {
 	const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh;
 	struct iphdr ip4h_checked;
@@ -359,6 +359,7 @@
 		return 4;
 	return 6;
 }
+EXPORT_SYMBOL(ib_get_rdma_header_version);
 
 static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
 						     u8 port_num,
@@ -369,7 +370,7 @@
 	if (rdma_protocol_ib(device, port_num))
 		return RDMA_NETWORK_IB;
 
-	grh_version = ib_get_header_version((union rdma_network_hdr *)grh);
+	grh_version = ib_get_rdma_header_version((union rdma_network_hdr *)grh);
 
 	if (grh_version == 4)
 		return RDMA_NETWORK_IPV4;
@@ -415,9 +416,9 @@
 				     &context, gid_index);
 }
 
-static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr,
-				  enum rdma_network_type net_type,
-				  union ib_gid *sgid, union ib_gid *dgid)
+int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
+			      enum rdma_network_type net_type,
+			      union ib_gid *sgid, union ib_gid *dgid)
 {
 	struct sockaddr_in  src_in;
 	struct sockaddr_in  dst_in;
@@ -447,6 +448,7 @@
 		return -EINVAL;
 	}
 }
+EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
 
 int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
 		       const struct ib_wc *wc, const struct ib_grh *grh,
@@ -469,8 +471,8 @@
 			net_type = ib_get_net_type_by_grh(device, port_num, grh);
 		gid_type = ib_network_to_gid_type(net_type);
 	}
-	ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
-				     &sgid, &dgid);
+	ret = ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
+					&sgid, &dgid);
 	if (ret)
 		return ret;
 
@@ -1014,6 +1016,7 @@
 						 IB_QP_QKEY),
 				 [IB_QPT_GSI] = (IB_QP_CUR_STATE		|
 						 IB_QP_QKEY),
+				 [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT,
 			 }
 		}
 	},
@@ -1047,6 +1050,7 @@
 						IB_QP_QKEY),
 				[IB_QPT_GSI] = (IB_QP_CUR_STATE			|
 						IB_QP_QKEY),
+				[IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT,
 			}
 		},
 		[IB_QPS_SQD]   = {
@@ -1196,66 +1200,66 @@
 }
 EXPORT_SYMBOL(ib_modify_qp_is_ok);
 
-int ib_resolve_eth_dmac(struct ib_qp *qp,
-			struct ib_qp_attr *qp_attr, int *qp_attr_mask)
+int ib_resolve_eth_dmac(struct ib_device *device,
+			struct ib_ah_attr *ah_attr)
 {
 	int           ret = 0;
 
-	if (*qp_attr_mask & IB_QP_AV) {
-		if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) ||
-		    qp_attr->ah_attr.port_num > rdma_end_port(qp->device))
-			return -EINVAL;
+	if (ah_attr->port_num < rdma_start_port(device) ||
+	    ah_attr->port_num > rdma_end_port(device))
+		return -EINVAL;
 
-		if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))
-			return 0;
+	if (!rdma_cap_eth_ah(device, ah_attr->port_num))
+		return 0;
 
-		if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) {
-			rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw,
-					qp_attr->ah_attr.dmac);
-		} else {
-			union ib_gid		sgid;
-			struct ib_gid_attr	sgid_attr;
-			int			ifindex;
-			int			hop_limit;
+	if (rdma_link_local_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
+		rdma_get_ll_mac((struct in6_addr *)ah_attr->grh.dgid.raw,
+				ah_attr->dmac);
+	} else {
+		union ib_gid		sgid;
+		struct ib_gid_attr	sgid_attr;
+		int			ifindex;
+		int			hop_limit;
 
-			ret = ib_query_gid(qp->device,
-					   qp_attr->ah_attr.port_num,
-					   qp_attr->ah_attr.grh.sgid_index,
-					   &sgid, &sgid_attr);
+		ret = ib_query_gid(device,
+				   ah_attr->port_num,
+				   ah_attr->grh.sgid_index,
+				   &sgid, &sgid_attr);
 
-			if (ret || !sgid_attr.ndev) {
-				if (!ret)
-					ret = -ENXIO;
-				goto out;
-			}
-
-			ifindex = sgid_attr.ndev->ifindex;
-
-			ret = rdma_addr_find_l2_eth_by_grh(&sgid,
-							   &qp_attr->ah_attr.grh.dgid,
-							   qp_attr->ah_attr.dmac,
-							   NULL, &ifindex, &hop_limit);
-
-			dev_put(sgid_attr.ndev);
-
-			qp_attr->ah_attr.grh.hop_limit = hop_limit;
+		if (ret || !sgid_attr.ndev) {
+			if (!ret)
+				ret = -ENXIO;
+			goto out;
 		}
+
+		ifindex = sgid_attr.ndev->ifindex;
+
+		ret = rdma_addr_find_l2_eth_by_grh(&sgid,
+						   &ah_attr->grh.dgid,
+						   ah_attr->dmac,
+						   NULL, &ifindex, &hop_limit);
+
+		dev_put(sgid_attr.ndev);
+
+		ah_attr->grh.hop_limit = hop_limit;
 	}
 out:
 	return ret;
 }
 EXPORT_SYMBOL(ib_resolve_eth_dmac);
 
-
 int ib_modify_qp(struct ib_qp *qp,
 		 struct ib_qp_attr *qp_attr,
 		 int qp_attr_mask)
 {
-	int ret;
 
-	ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask);
-	if (ret)
-		return ret;
+	if (qp_attr_mask & IB_QP_AV) {
+		int ret;
+
+		ret = ib_resolve_eth_dmac(qp->device, &qp_attr->ah_attr);
+		if (ret)
+			return ret;
+	}
 
 	return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
 }
@@ -1734,8 +1738,10 @@
 		return ERR_PTR(-ENOSYS);
 
 	flow_id = qp->device->create_flow(qp, flow_attr, domain);
-	if (!IS_ERR(flow_id))
+	if (!IS_ERR(flow_id)) {
 		atomic_inc(&qp->usecnt);
+		flow_id->qp = qp;
+	}
 	return flow_id;
 }
 EXPORT_SYMBOL(ib_create_flow);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
index 8bca6b4..445e89e 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
@@ -45,10 +45,9 @@
 	int size = 32;
 
 	m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
-	if (!m) {
-		PDBG("%s couldn't allocate memory.\n", __func__);
+	if (!m)
 		return;
-	}
+
 	m->mem_id = MEM_PMRX;
 	m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base;
 	m->len = size;
@@ -82,10 +81,9 @@
 	size = npages * sizeof(u64);
 
 	m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
-	if (!m) {
-		PDBG("%s couldn't allocate memory.\n", __func__);
+	if (!m)
 		return;
-	}
+
 	m->mem_id = MEM_PMRX;
 	m->addr = pbl_addr;
 	m->len = size;
@@ -144,10 +142,9 @@
 	int rc;
 
 	m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
-	if (!m) {
-		PDBG("%s couldn't allocate memory.\n", __func__);
+	if (!m)
 		return;
-	}
+
 	m->mem_id = MEM_PMRX;
 	m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
 	m->len = size;
@@ -177,10 +174,9 @@
 	int rc;
 
 	m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
-	if (!m) {
-		PDBG("%s couldn't allocate memory.\n", __func__);
+	if (!m)
 		return;
-	}
+
 	m->mem_id = MEM_CM;
 	m->addr = hwtid * size;
 	m->len = size;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index cba57bb..9d5fe18 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -62,7 +62,8 @@
 #include "common.h"
 
 static struct ib_ah *iwch_ah_create(struct ib_pd *pd,
-				    struct ib_ah_attr *ah_attr)
+				    struct ib_ah_attr *ah_attr,
+				    struct ib_udata *udata)
 {
 	return ERR_PTR(-ENOSYS);
 }
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 93e3d27..cc7cf18 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -828,8 +828,10 @@
 	}
 	rdev->status_page = (struct t4_dev_status_page *)
 			    __get_free_page(GFP_KERNEL);
-	if (!rdev->status_page)
+	if (!rdev->status_page) {
+		err = -ENOMEM;
 		goto destroy_ocqp_pool;
+	}
 	rdev->status_page->qp_start = rdev->lldi.vr->qp.start;
 	rdev->status_page->qp_size = rdev->lldi.vr->qp.size;
 	rdev->status_page->cq_start = rdev->lldi.vr->cq.start;
@@ -841,8 +843,6 @@
 		if (rdev->wr_log) {
 			rdev->wr_log_size = 1 << c4iw_wr_log_size_order;
 			atomic_set(&rdev->wr_log_idx, 0);
-		} else {
-			pr_err(MOD "error allocating wr_log. Logging disabled\n");
 		}
 	}
 
@@ -1424,8 +1424,6 @@
 
 	qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
 	if (!qp_list.qps) {
-		printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
-		       pci_name(ctx->lldi.pdev));
 		spin_unlock_irq(&ctx->dev->lock);
 		return;
 	}
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 645e606..49b51b7 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -59,7 +59,9 @@
 MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)");
 
 static struct ib_ah *c4iw_ah_create(struct ib_pd *pd,
-				    struct ib_ah_attr *ah_attr)
+				    struct ib_ah_attr *ah_attr,
+				    struct ib_udata *udata)
+
 {
 	return ERR_PTR(-ENOSYS);
 }
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index a26a9a0..4962b6e 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -125,6 +125,7 @@
 				cpumask_weight(topology_sibling_cpumask(
 					cpumask_first(&node_affinity.proc.mask)
 					));
+	node_affinity.num_possible_nodes = num_possible_nodes();
 	node_affinity.num_online_nodes = num_online_nodes();
 	node_affinity.num_online_cpus = num_online_cpus();
 
@@ -135,7 +136,7 @@
 	 */
 	init_real_cpu_mask();
 
-	hfi1_per_node_cntr = kcalloc(num_possible_nodes(),
+	hfi1_per_node_cntr = kcalloc(node_affinity.num_possible_nodes,
 				     sizeof(*hfi1_per_node_cntr), GFP_KERNEL);
 	if (!hfi1_per_node_cntr)
 		return -ENOMEM;
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index b89ea3c..c9453b3 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -70,14 +70,6 @@
 	uint gen;
 };
 
-struct hfi1_affinity {
-	struct cpu_mask_set def_intr;
-	struct cpu_mask_set rcv_intr;
-	struct cpumask real_cpu_mask;
-	/* spin lock to protect affinity struct */
-	spinlock_t lock;
-};
-
 struct hfi1_msix_entry;
 
 /* Initialize non-HT cpu cores mask */
@@ -119,6 +111,7 @@
 	struct cpumask real_cpu_mask;
 	struct cpu_mask_set proc;
 	int num_core_siblings;
+	int num_possible_nodes;
 	int num_online_nodes;
 	int num_online_cpus;
 	struct mutex lock; /* protects affinity nodes */
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 9bf5f23..37d8af5 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -8488,7 +8488,10 @@
 	 */
 	if (type == HCMD_WRITE_LCB_CSR) {
 		in_data |= ((*out_data) & 0xffffffffffull) << 8;
-		reg = ((((*out_data) >> 40) & 0xff) <<
+		/* must preserve COMPLETED - it is tied to hardware */
+		reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_0);
+		reg &= DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK;
+		reg |= ((((*out_data) >> 40) & 0xff) <<
 				DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT)
 		      | ((((*out_data) >> 48) & 0xffff) <<
 				DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
@@ -9567,11 +9570,11 @@
 	if (HFI1_CAP_IS_KSET(EXTENDED_PSN))
 		add_rcvctrl(dd, RCV_CTRL_RCV_EXTENDED_PSN_ENABLE_SMASK);
 
-	guid = ppd->guid;
+	guid = ppd->guids[HFI1_PORT_GUID_INDEX];
 	if (!guid) {
 		if (dd->base_guid)
 			guid = dd->base_guid + ppd->port - 1;
-		ppd->guid = guid;
+		ppd->guids[HFI1_PORT_GUID_INDEX] = guid;
 	}
 
 	/* Set linkinit_reason on power up per OPA spec */
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index 5b99938..5bfa839 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -415,6 +415,9 @@
 #define ASIC_CFG_SBUS_REQUEST_DATA_IN_SHIFT 32
 #define ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT 0
 #define ASIC_CFG_SCRATCH (ASIC + 0x000000000020)
+#define ASIC_CFG_SCRATCH_1 (ASIC_CFG_SCRATCH + 0x08)
+#define ASIC_CFG_SCRATCH_2 (ASIC_CFG_SCRATCH + 0x10)
+#define ASIC_CFG_SCRATCH_3 (ASIC_CFG_SCRATCH + 0x18)
 #define ASIC_CFG_THERM_POLL_EN (ASIC + 0x000000000050)
 #define ASIC_EEP_ADDR_CMD (ASIC + 0x000000000308)
 #define ASIC_EEP_ADDR_CMD_EP_ADDR_MASK 0xFFFFFFull
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 632ba21..8725f4c 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -541,6 +541,114 @@
 	return ret;
 }
 
+/* read the dc8051 memory */
+static ssize_t dc8051_memory_read(struct file *file, char __user *buf,
+				  size_t count, loff_t *ppos)
+{
+	struct hfi1_pportdata *ppd = private2ppd(file);
+	ssize_t rval;
+	void *tmp;
+	loff_t start, end;
+
+	/* the checks below expect the position to be positive */
+	if (*ppos < 0)
+		return -EINVAL;
+
+	tmp = kzalloc(DC8051_DATA_MEM_SIZE, GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	/*
+	 * Fill in the requested portion of the temporary buffer from the
+	 * 8051 memory.  The 8051 memory read is done in terms of 8 bytes.
+	 * Adjust start and end to fit.  Skip reading anything if out of
+	 * range.
+	 */
+	start = *ppos & ~0x7;	/* round down */
+	if (start < DC8051_DATA_MEM_SIZE) {
+		end = (*ppos + count + 7) & ~0x7; /* round up */
+		if (end > DC8051_DATA_MEM_SIZE)
+			end = DC8051_DATA_MEM_SIZE;
+		rval = read_8051_data(ppd->dd, start, end - start,
+				      (u64 *)(tmp + start));
+		if (rval)
+			goto done;
+	}
+
+	rval = simple_read_from_buffer(buf, count, ppos, tmp,
+				       DC8051_DATA_MEM_SIZE);
+done:
+	kfree(tmp);
+	return rval;
+}
+
+static ssize_t debugfs_lcb_read(struct file *file, char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	struct hfi1_pportdata *ppd = private2ppd(file);
+	struct hfi1_devdata *dd = ppd->dd;
+	unsigned long total, csr_off;
+	u64 data;
+
+	if (*ppos < 0)
+		return -EINVAL;
+	/* only read 8 byte quantities */
+	if ((count % 8) != 0)
+		return -EINVAL;
+	/* offset must be 8-byte aligned */
+	if ((*ppos % 8) != 0)
+		return -EINVAL;
+	/* do nothing if out of range or zero count */
+	if (*ppos >= (LCB_END - LCB_START) || !count)
+		return 0;
+	/* reduce count if needed */
+	if (*ppos + count > LCB_END - LCB_START)
+		count = (LCB_END - LCB_START) - *ppos;
+
+	csr_off = LCB_START + *ppos;
+	for (total = 0; total < count; total += 8, csr_off += 8) {
+		if (read_lcb_csr(dd, csr_off, (u64 *)&data))
+			break; /* failed */
+		if (put_user(data, (unsigned long __user *)(buf + total)))
+			break;
+	}
+	*ppos += total;
+	return total;
+}
+
+static ssize_t debugfs_lcb_write(struct file *file, const char __user *buf,
+				 size_t count, loff_t *ppos)
+{
+	struct hfi1_pportdata *ppd = private2ppd(file);
+	struct hfi1_devdata *dd = ppd->dd;
+	unsigned long total, csr_off, data;
+
+	if (*ppos < 0)
+		return -EINVAL;
+	/* only write 8 byte quantities */
+	if ((count % 8) != 0)
+		return -EINVAL;
+	/* offset must be 8-byte aligned */
+	if ((*ppos % 8) != 0)
+		return -EINVAL;
+	/* do nothing if out of range or zero count */
+	if (*ppos >= (LCB_END - LCB_START) || !count)
+		return 0;
+	/* reduce count if needed */
+	if (*ppos + count > LCB_END - LCB_START)
+		count = (LCB_END - LCB_START) - *ppos;
+
+	csr_off = LCB_START + *ppos;
+	for (total = 0; total < count; total += 8, csr_off += 8) {
+		if (get_user(data, (unsigned long __user *)(buf + total)))
+			break;
+		if (write_lcb_csr(dd, csr_off, data))
+			break; /* failed */
+	}
+	*ppos += total;
+	return total;
+}
+
 /*
  * read the per-port QSFP data for ppd
  */
@@ -931,6 +1039,8 @@
 	DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write,
 		     qsfp2_debugfs_open, qsfp2_debugfs_release),
 	DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write),
+	DEBUGFS_OPS("dc8051_memory", dc8051_memory_read, NULL),
+	DEBUGFS_OPS("lcb", debugfs_lcb_read, debugfs_lcb_write),
 };
 
 static void *_sdma_cpu_list_seq_start(struct seq_file *s, loff_t *pos)
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 6563e4d..d426116 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -793,8 +793,7 @@
 				hfi1_schedule_send(qp);
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 		}
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
+		rvt_put_qp(qp);
 	}
 }
 
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
index e70c223..26da124 100644
--- a/drivers/infiniband/hw/hfi1/eprom.c
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -207,6 +207,40 @@
 /* magic character sequence that trails an image */
 #define IMAGE_TRAIL_MAGIC "egamiAPO"
 
+/* EPROM file types */
+#define HFI1_EFT_PLATFORM_CONFIG 2
+
+/* segment size - 128 KiB */
+#define SEG_SIZE (128 * 1024)
+
+struct hfi1_eprom_footer {
+	u32 oprom_size;		/* size of the oprom, in bytes */
+	u16 num_table_entries;
+	u16 version;		/* version of this footer */
+	u32 magic;		/* must be last */
+};
+
+struct hfi1_eprom_table_entry {
+	u32 type;		/* file type */
+	u32 offset;		/* file offset from start of EPROM */
+	u32 size;		/* file size, in bytes */
+};
+
+/*
+ * Calculate the max number of table entries that will fit within a directory
+ * buffer of size 'dir_size'.
+ */
+#define MAX_TABLE_ENTRIES(dir_size) \
+	(((dir_size) - sizeof(struct hfi1_eprom_footer)) / \
+		sizeof(struct hfi1_eprom_table_entry))
+
+#define DIRECTORY_SIZE(n) (sizeof(struct hfi1_eprom_footer) + \
+	(sizeof(struct hfi1_eprom_table_entry) * (n)))
+
+#define MAGIC4(a, b, c, d) ((d) << 24 | (c) << 16 | (b) << 8 | (a))
+#define FOOTER_MAGIC MAGIC4('e', 'p', 'r', 'm')
+#define FOOTER_VERSION 1
+
 /*
  * Read all of partition 1.  The actual file is at the front.  Adjust
  * the returned size if a trailing image magic is found.
@@ -242,6 +276,167 @@
 }
 
 /*
+ * The segment magic has been checked.  There is a footer and table of
+ * contents present.
+ *
+ * directory is a u32 aligned buffer of size EP_PAGE_SIZE.
+ */
+static int read_segment_platform_config(struct hfi1_devdata *dd,
+					void *directory, void **data, u32 *size)
+{
+	struct hfi1_eprom_footer *footer;
+	struct hfi1_eprom_table_entry *table;
+	struct hfi1_eprom_table_entry *entry;
+	void *buffer = NULL;
+	void *table_buffer = NULL;
+	int ret, i;
+	u32 directory_size;
+	u32 seg_base, seg_offset;
+	u32 bytes_available, ncopied, to_copy;
+
+	/* the footer is at the end of the directory */
+	footer = (struct hfi1_eprom_footer *)
+			(directory + EP_PAGE_SIZE - sizeof(*footer));
+
+	/* make sure the structure version is supported */
+	if (footer->version != FOOTER_VERSION)
+		return -EINVAL;
+
+	/* oprom size cannot be larger than a segment */
+	if (footer->oprom_size >= SEG_SIZE)
+		return -EINVAL;
+
+	/* the file table must fit in a segment with the oprom */
+	if (footer->num_table_entries >
+			MAX_TABLE_ENTRIES(SEG_SIZE - footer->oprom_size))
+		return -EINVAL;
+
+	/* find the file table start, which precedes the footer */
+	directory_size = DIRECTORY_SIZE(footer->num_table_entries);
+	if (directory_size <= EP_PAGE_SIZE) {
+		/* the file table fits into the directory buffer handed in */
+		table = (struct hfi1_eprom_table_entry *)
+				(directory + EP_PAGE_SIZE - directory_size);
+	} else {
+		/* need to allocate and read more */
+		table_buffer = kmalloc(directory_size, GFP_KERNEL);
+		if (!table_buffer)
+			return -ENOMEM;
+		ret = read_length(dd, SEG_SIZE - directory_size,
+				  directory_size, table_buffer);
+		if (ret)
+			goto done;
+		table = table_buffer;
+	}
+
+	/* look for the platform configuration file in the table */
+	for (entry = NULL, i = 0; i < footer->num_table_entries; i++) {
+		if (table[i].type == HFI1_EFT_PLATFORM_CONFIG) {
+			entry = &table[i];
+			break;
+		}
+	}
+	if (!entry) {
+		ret = -ENOENT;
+		goto done;
+	}
+
+	/*
+	 * Sanity check on the configuration file size - it should never
+	 * be larger than 4 KiB.
+	 */
+	if (entry->size > (4 * 1024)) {
+		dd_dev_err(dd, "Bad configuration file size 0x%x\n",
+			   entry->size);
+		ret = -EINVAL;
+		goto done;
+	}
+
+	/* check for bogus offset and size that wrap when added together */
+	if (entry->offset + entry->size < entry->offset) {
+		dd_dev_err(dd,
+			   "Bad configuration file start + size 0x%x+0x%x\n",
+			   entry->offset, entry->size);
+		ret = -EINVAL;
+		goto done;
+	}
+
+	/* allocate the buffer to return */
+	buffer = kmalloc(entry->size, GFP_KERNEL);
+	if (!buffer) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	/*
+	 * Extract the file by looping over segments until it is fully read.
+	 */
+	seg_offset = entry->offset % SEG_SIZE;
+	seg_base = entry->offset - seg_offset;
+	ncopied = 0;
+	while (ncopied < entry->size) {
+		/* calculate data bytes available in this segment */
+
+		/* start with the bytes from the current offset to the end */
+		bytes_available = SEG_SIZE - seg_offset;
+		/* subtract off footer and table from segment 0 */
+		if (seg_base == 0) {
+			/*
+			 * Sanity check: should not have a starting point
+			 * at or within the directory.
+			 */
+			if (bytes_available <= directory_size) {
+				dd_dev_err(dd,
+					   "Bad configuration file - offset 0x%x within footer+table\n",
+					   entry->offset);
+				ret = -EINVAL;
+				goto done;
+			}
+			bytes_available -= directory_size;
+		}
+
+		/* calculate bytes wanted */
+		to_copy = entry->size - ncopied;
+
+		/* max out at the available bytes in this segment */
+		if (to_copy > bytes_available)
+			to_copy = bytes_available;
+
+		/*
+		 * Read from the EPROM.
+		 *
+		 * The sanity check for entry->offset is done in read_length().
+		 * The EPROM offset is validated against what the hardware
+		 * addressing supports.  In addition, if the offset is larger
+		 * than the actual EPROM, it silently wraps.  It will work
+		 * fine, though the reader may not get what they expected
+		 * from the EPROM.
+		 */
+		ret = read_length(dd, seg_base + seg_offset, to_copy,
+				  buffer + ncopied);
+		if (ret)
+			goto done;
+
+		ncopied += to_copy;
+
+		/* set up for next segment */
+		seg_offset = footer->oprom_size;
+		seg_base += SEG_SIZE;
+	}
+
+	/* success */
+	ret = 0;
+	*data = buffer;
+	*size = entry->size;
+
+done:
+	kfree(table_buffer);
+	if (ret)
+		kfree(buffer);
+	return ret;
+}
+
+/*
  * Read the platform configuration file from the EPROM.
  *
  * On success, an allocated buffer containing the data and its size are
@@ -253,6 +448,7 @@
  *   -EBUSY   - not able to acquire access to the EPROM
  *   -ENOENT  - no recognizable file written
  *   -ENOMEM  - buffer could not be allocated
+ *   -EINVAL  - invalid EPROM contentents found
  */
 int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size)
 {
@@ -266,21 +462,20 @@
 	if (ret)
 		return -EBUSY;
 
-	/* read the last page of P0 for the EPROM format magic */
-	ret = read_length(dd, P1_START - EP_PAGE_SIZE, EP_PAGE_SIZE, directory);
+	/* read the last page of the segment for the EPROM format magic */
+	ret = read_length(dd, SEG_SIZE - EP_PAGE_SIZE, EP_PAGE_SIZE, directory);
 	if (ret)
 		goto done;
 
-	/* last dword of P0 contains a magic indicator */
-	if (directory[EP_PAGE_DWORDS - 1] == 0) {
+	/* last dword of the segment contains a magic value */
+	if (directory[EP_PAGE_DWORDS - 1] == FOOTER_MAGIC) {
+		/* segment format */
+		ret = read_segment_platform_config(dd, directory, data, size);
+	} else {
 		/* partition format */
 		ret = read_partition_platform_config(dd, data, size);
-		goto done;
 	}
 
-	/* nothing recognized */
-	ret = -ENOENT;
-
 done:
 	release_chip_resource(dd, CR_EPROM);
 	return ret;
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 13db8eb..0dd50cd 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -239,6 +239,16 @@
 const u8 pcie_serdes_broadcast[2] = { 0xe2, 0xe3 };
 static const u8 all_pcie_serdes_broadcast = 0xe0;
 
+static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = {
+	0,
+	SYSTEM_TABLE_MAX,
+	PORT_TABLE_MAX,
+	RX_PRESET_TABLE_MAX,
+	TX_PRESET_TABLE_MAX,
+	QSFP_ATTEN_TABLE_MAX,
+	VARIABLE_SETTINGS_TABLE_MAX
+};
+
 /* forwards */
 static void dispose_one_firmware(struct firmware_details *fdet);
 static int load_fabric_serdes_firmware(struct hfi1_devdata *dd,
@@ -263,11 +273,13 @@
 	u64 reg;
 	int count;
 
-	/* start the read at the given address */
-	reg = ((addr & DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_MASK)
-			<< DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_SHIFT)
-		| DC_DC8051_CFG_RAM_ACCESS_CTRL_READ_ENA_SMASK;
+	/* step 1: set the address, clear enable */
+	reg = (addr & DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_MASK)
+			<< DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_SHIFT;
 	write_csr(dd, DC_DC8051_CFG_RAM_ACCESS_CTRL, reg);
+	/* step 2: enable */
+	write_csr(dd, DC_DC8051_CFG_RAM_ACCESS_CTRL,
+		  reg | DC_DC8051_CFG_RAM_ACCESS_CTRL_READ_ENA_SMASK);
 
 	/* wait until ACCESS_COMPLETED is set */
 	count = 0;
@@ -707,6 +719,9 @@
 				       &dd->pcidev->dev);
 		if (err) {
 			platform_config = NULL;
+			dd_dev_err(dd,
+				   "%s: No default platform config file found\n",
+				   __func__);
 			goto done;
 		}
 		dd->platform_config.data = platform_config->data;
@@ -1761,8 +1776,17 @@
 	u32 record_idx = 0, table_type = 0, table_length_dwords = 0;
 	int ret = -EINVAL; /* assume failure */
 
+	/*
+	 * For integrated devices that did not fall back to the default file,
+	 * the SI tuning information for active channels is acquired from the
+	 * scratch register bitmap, thus there is no platform config to parse.
+	 * Skip parsing in these situations.
+	 */
+	if (is_integrated(dd) && !platform_config_load)
+		return 0;
+
 	if (!dd->platform_config.data) {
-		dd_dev_info(dd, "%s: Missing config file\n", __func__);
+		dd_dev_err(dd, "%s: Missing config file\n", __func__);
 		goto bail;
 	}
 	ptr = (u32 *)dd->platform_config.data;
@@ -1770,7 +1794,7 @@
 	magic_num = *ptr;
 	ptr++;
 	if (magic_num != PLATFORM_CONFIG_MAGIC_NUM) {
-		dd_dev_info(dd, "%s: Bad config file\n", __func__);
+		dd_dev_err(dd, "%s: Bad config file\n", __func__);
 		goto bail;
 	}
 
@@ -1797,9 +1821,9 @@
 		header1 = *ptr;
 		header2 = *(ptr + 1);
 		if (header1 != ~header2) {
-			dd_dev_info(dd, "%s: Failed validation at offset %ld\n",
-				    __func__, (ptr - (u32 *)
-					       dd->platform_config.data));
+			dd_dev_err(dd, "%s: Failed validation at offset %ld\n",
+				   __func__, (ptr - (u32 *)
+					      dd->platform_config.data));
 			goto bail;
 		}
 
@@ -1841,11 +1865,11 @@
 							table_length_dwords;
 				break;
 			default:
-				dd_dev_info(dd,
-					    "%s: Unknown data table %d, offset %ld\n",
-					    __func__, table_type,
-					    (ptr - (u32 *)
-					     dd->platform_config.data));
+				dd_dev_err(dd,
+					   "%s: Unknown data table %d, offset %ld\n",
+					   __func__, table_type,
+					   (ptr - (u32 *)
+					    dd->platform_config.data));
 				goto bail; /* We don't trust this file now */
 			}
 			pcfgcache->config_tables[table_type].table = ptr;
@@ -1865,11 +1889,11 @@
 			case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
 				break;
 			default:
-				dd_dev_info(dd,
-					    "%s: Unknown meta table %d, offset %ld\n",
-					    __func__, table_type,
-					    (ptr -
-					     (u32 *)dd->platform_config.data));
+				dd_dev_err(dd,
+					   "%s: Unknown meta table %d, offset %ld\n",
+					   __func__, table_type,
+					   (ptr -
+					    (u32 *)dd->platform_config.data));
 				goto bail; /* We don't trust this file now */
 			}
 			pcfgcache->config_tables[table_type].table_metadata =
@@ -1884,10 +1908,9 @@
 		/* Jump the table */
 		ptr += table_length_dwords;
 		if (crc != *ptr) {
-			dd_dev_info(dd, "%s: Failed CRC check at offset %ld\n",
-				    __func__, (ptr -
-					       (u32 *)
-					       dd->platform_config.data));
+			dd_dev_err(dd, "%s: Failed CRC check at offset %ld\n",
+				   __func__, (ptr -
+				   (u32 *)dd->platform_config.data));
 			goto bail;
 		}
 		/* Jump the CRC DWORD */
@@ -1901,6 +1924,84 @@
 	return ret;
 }
 
+static void get_integrated_platform_config_field(
+		struct hfi1_devdata *dd,
+		enum platform_config_table_type_encoding table_type,
+		int field_index, u32 *data)
+{
+	struct hfi1_pportdata *ppd = dd->pport;
+	u8 *cache = ppd->qsfp_info.cache;
+	u32 tx_preset = 0;
+
+	switch (table_type) {
+	case PLATFORM_CONFIG_SYSTEM_TABLE:
+		if (field_index == SYSTEM_TABLE_QSFP_POWER_CLASS_MAX)
+			*data = ppd->max_power_class;
+		else if (field_index == SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_25G)
+			*data = ppd->default_atten;
+		break;
+	case PLATFORM_CONFIG_PORT_TABLE:
+		if (field_index == PORT_TABLE_PORT_TYPE)
+			*data = ppd->port_type;
+		else if (field_index == PORT_TABLE_LOCAL_ATTEN_25G)
+			*data = ppd->local_atten;
+		else if (field_index == PORT_TABLE_REMOTE_ATTEN_25G)
+			*data = ppd->remote_atten;
+		break;
+	case PLATFORM_CONFIG_RX_PRESET_TABLE:
+		if (field_index == RX_PRESET_TABLE_QSFP_RX_CDR_APPLY)
+			*data = (ppd->rx_preset & QSFP_RX_CDR_APPLY_SMASK) >>
+				QSFP_RX_CDR_APPLY_SHIFT;
+		else if (field_index == RX_PRESET_TABLE_QSFP_RX_EMP_APPLY)
+			*data = (ppd->rx_preset & QSFP_RX_EMP_APPLY_SMASK) >>
+				QSFP_RX_EMP_APPLY_SHIFT;
+		else if (field_index == RX_PRESET_TABLE_QSFP_RX_AMP_APPLY)
+			*data = (ppd->rx_preset & QSFP_RX_AMP_APPLY_SMASK) >>
+				QSFP_RX_AMP_APPLY_SHIFT;
+		else if (field_index == RX_PRESET_TABLE_QSFP_RX_CDR)
+			*data = (ppd->rx_preset & QSFP_RX_CDR_SMASK) >>
+				QSFP_RX_CDR_SHIFT;
+		else if (field_index == RX_PRESET_TABLE_QSFP_RX_EMP)
+			*data = (ppd->rx_preset & QSFP_RX_EMP_SMASK) >>
+				QSFP_RX_EMP_SHIFT;
+		else if (field_index == RX_PRESET_TABLE_QSFP_RX_AMP)
+			*data = (ppd->rx_preset & QSFP_RX_AMP_SMASK) >>
+				QSFP_RX_AMP_SHIFT;
+		break;
+	case PLATFORM_CONFIG_TX_PRESET_TABLE:
+		if (cache[QSFP_EQ_INFO_OFFS] & 0x4)
+			tx_preset = ppd->tx_preset_eq;
+		else
+			tx_preset = ppd->tx_preset_noeq;
+		if (field_index == TX_PRESET_TABLE_PRECUR)
+			*data = (tx_preset & TX_PRECUR_SMASK) >>
+				TX_PRECUR_SHIFT;
+		else if (field_index == TX_PRESET_TABLE_ATTN)
+			*data = (tx_preset & TX_ATTN_SMASK) >>
+				TX_ATTN_SHIFT;
+		else if (field_index == TX_PRESET_TABLE_POSTCUR)
+			*data = (tx_preset & TX_POSTCUR_SMASK) >>
+				TX_POSTCUR_SHIFT;
+		else if (field_index == TX_PRESET_TABLE_QSFP_TX_CDR_APPLY)
+			*data = (tx_preset & QSFP_TX_CDR_APPLY_SMASK) >>
+				QSFP_TX_CDR_APPLY_SHIFT;
+		else if (field_index == TX_PRESET_TABLE_QSFP_TX_EQ_APPLY)
+			*data = (tx_preset & QSFP_TX_EQ_APPLY_SMASK) >>
+				QSFP_TX_EQ_APPLY_SHIFT;
+		else if (field_index == TX_PRESET_TABLE_QSFP_TX_CDR)
+			*data = (tx_preset & QSFP_TX_CDR_SMASK) >>
+				QSFP_TX_CDR_SHIFT;
+		else if (field_index == TX_PRESET_TABLE_QSFP_TX_EQ)
+			*data = (tx_preset & QSFP_TX_EQ_SMASK) >>
+				QSFP_TX_EQ_SHIFT;
+		break;
+	case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
+	case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
+	default:
+		break;
+	}
+}
+
 static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table,
 					  int field, u32 *field_len_bits,
 					  u32 *field_start_bits)
@@ -1976,6 +2077,15 @@
 	else
 		return -EINVAL;
 
+	if (is_integrated(dd) && !platform_config_load) {
+		/*
+		 * Use saved configuration from ppd for integrated platforms
+		 */
+		get_integrated_platform_config_field(dd, table_type,
+						     field_index, data);
+		return 0;
+	}
+
 	ret = get_platform_fw_field_metadata(dd, table_type, field_index,
 					     &field_len_bits,
 					     &field_start_bits);
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 7eef11b..4163596 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -512,6 +512,9 @@
 #define HFI1_MIN_VLS_SUPPORTED 1
 #define HFI1_MAX_VLS_SUPPORTED 8
 
+#define HFI1_GUIDS_PER_PORT  5
+#define HFI1_PORT_GUID_INDEX 0
+
 static inline void incr_cntr64(u64 *cntr)
 {
 	if (*cntr < (u64)-1LL)
@@ -579,11 +582,20 @@
 	struct kobject vl2mtu_kobj;
 
 	/* PHY support */
-	u32 port_type;
 	struct qsfp_data qsfp_info;
+	/* Values for SI tuning of SerDes */
+	u32 port_type;
+	u32 tx_preset_eq;
+	u32 tx_preset_noeq;
+	u32 rx_preset;
+	u8  local_atten;
+	u8  remote_atten;
+	u8  default_atten;
+	u8  max_power_class;
 
-	/* GUID for this interface, in host order */
-	u64 guid;
+	/* GUIDs for this interface, in host order, guids[0] is a port guid */
+	u64 guids[HFI1_GUIDS_PER_PORT];
+
 	/* GUID for peer interface, in host order */
 	u64 neighbor_guid;
 
@@ -848,32 +860,29 @@
 	u8 __iomem *kregend;
 	/* physical address of chip for io_remap, etc. */
 	resource_size_t physaddr;
-	/* receive context data */
-	struct hfi1_ctxtdata **rcd;
+	/* Per VL data. Enough for all VLs but not all elements are set/used. */
+	struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
 	/* send context data */
 	struct send_context_info *send_contexts;
 	/* map hardware send contexts to software index */
 	u8 *hw_to_sw;
 	/* spinlock for allocating and releasing send context resources */
 	spinlock_t sc_lock;
-	/* Per VL data. Enough for all VLs but not all elements are set/used. */
-	struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
 	/* lock for pio_map */
 	spinlock_t pio_map_lock;
+	/* Send Context initialization lock. */
+	spinlock_t sc_init_lock;
+	/* lock for sdma_map */
+	spinlock_t                          sde_map_lock;
 	/* array of kernel send contexts */
 	struct send_context **kernel_send_context;
 	/* array of vl maps */
 	struct pio_vl_map __rcu *pio_map;
-	/* seqlock for sc2vl */
-	seqlock_t sc2vl_lock;
-	u64 sc2vl[4];
-	/* Send Context initialization lock. */
-	spinlock_t sc_init_lock;
+	/* default flags to last descriptor */
+	u64 default_desc1;
 
 	/* fields common to all SDMA engines */
 
-	/* default flags to last descriptor */
-	u64 default_desc1;
 	volatile __le64                    *sdma_heads_dma; /* DMA'ed by chip */
 	dma_addr_t                          sdma_heads_phys;
 	void                               *sdma_pad_dma; /* DMA'ed by chip */
@@ -884,8 +893,6 @@
 	u32                                 chip_sdma_engines;
 	/* num used */
 	u32                                 num_sdma;
-	/* lock for sdma_map */
-	spinlock_t                          sde_map_lock;
 	/* array of engines sized by num_sdma */
 	struct sdma_engine                 *per_sdma;
 	/* array of vl maps */
@@ -894,14 +901,11 @@
 	wait_queue_head_t		  sdma_unfreeze_wq;
 	atomic_t			  sdma_unfreeze_count;
 
+	u32 lcb_access_count;		/* count of LCB users */
+
 	/* common data between shared ASIC HFIs in this OS */
 	struct hfi1_asic_data *asic_data;
 
-	/* hfi1_pportdata, points to array of (physical) port-specific
-	 * data structs, indexed by pidx (0..n-1)
-	 */
-	struct hfi1_pportdata *pport;
-
 	/* mem-mapped pointer to base of PIO buffers */
 	void __iomem *piobase;
 	/*
@@ -918,20 +922,13 @@
 	/* send context numbers and sizes for each type */
 	struct sc_config_sizes sc_sizes[SC_MAX];
 
-	u32 lcb_access_count;		/* count of LCB users */
-
 	char *boardname; /* human readable board info */
 
-	/* device (not port) flags, basically device capabilities */
-	u32 flags;
-
 	/* reset value */
 	u64 z_int_counter;
 	u64 z_rcv_limit;
 	u64 z_send_schedule;
-	/* percpu int_counter */
-	u64 __percpu *int_counter;
-	u64 __percpu *rcv_limit;
+
 	u64 __percpu *send_schedule;
 	/* number of receive contexts in use by the driver */
 	u32 num_rcv_contexts;
@@ -946,6 +943,7 @@
 	/* base receive interrupt timeout, in CSR units */
 	u32 rcv_intr_timeout_csr;
 
+	u32 freezelen; /* max length of freezemsg */
 	u64 __iomem *egrtidbase;
 	spinlock_t sendctrl_lock; /* protect changes to SendCtrl */
 	spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
@@ -967,7 +965,6 @@
 	 * IB link status cheaply
 	 */
 	struct hfi1_status *status;
-	u32 freezelen; /* max length of freezemsg */
 
 	/* revision register shadow */
 	u64 revision;
@@ -995,6 +992,8 @@
 	u16 rcvegrbufsize_shift;
 	/* both sides of the PCIe link are gen3 capable */
 	u8 link_gen3_capable;
+	/* default link down value (poll/sleep) */
+	u8 link_default;
 	/* localbus width (1, 2,4,8,16,32) from config space  */
 	u32 lbus_width;
 	/* localbus speed in MHz */
@@ -1030,8 +1029,6 @@
 	u8 hfi1_id;
 	/* implementation code */
 	u8 icode;
-	/* default link down value (poll/sleep) */
-	u8 link_default;
 	/* vAU of this device */
 	u8 vau;
 	/* vCU of this device */
@@ -1042,27 +1039,17 @@
 	u16 vl15_init;
 
 	/* Misc small ints */
-	/* Number of physical ports available */
-	u8 num_pports;
-	/* Lowest context number which can be used by user processes */
-	u8 first_user_ctxt;
 	u8 n_krcv_queues;
 	u8 qos_shift;
-	u8 qpn_mask;
 
-	u16 rhf_offset; /* offset of RHF within receive header entry */
 	u16 irev;	/* implementation revision */
 	u16 dc8051_ver; /* 8051 firmware version */
 
+	spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
 	struct platform_config platform_config;
 	struct platform_config_cache pcfg_cache;
 
 	struct diag_client *diag_client;
-	spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
-
-	u8 psxmitwait_supported;
-	/* cycle length of PS* counters in HW (in picoseconds) */
-	u16 psxmitwait_check_rate;
 
 	/* MSI-X information */
 	struct hfi1_msix_entry *msix_entries;
@@ -1077,6 +1064,9 @@
 
 	struct rcv_array_data rcv_entries;
 
+	/* cycle length of PS* counters in HW (in picoseconds) */
+	u16 psxmitwait_check_rate;
+
 	/*
 	 * 64 bit synthetic counters
 	 */
@@ -1109,11 +1099,11 @@
 	struct err_info_rcvport err_info_rcvport;
 	struct err_info_constraint err_info_rcv_constraint;
 	struct err_info_constraint err_info_xmit_constraint;
-	u8 err_info_uncorrectable;
-	u8 err_info_fmconfig;
 
 	atomic_t drop_packet;
 	u8 do_drop;
+	u8 err_info_uncorrectable;
+	u8 err_info_fmconfig;
 
 	/*
 	 * Software counters for the status bits defined by the
@@ -1136,47 +1126,70 @@
 	u64 sw_cce_err_status_aggregate;
 	/* Software counter that aggregates all bypass packet rcv errors */
 	u64 sw_rcv_bypass_packet_errors;
-	/* receive interrupt functions */
-	rhf_rcv_function_ptr *rhf_rcv_function_map;
+	/* receive interrupt function */
 	rhf_rcv_function_ptr normal_rhf_rcv_functions[8];
 
+	/* Save the enabled LCB error bits */
+	u64 lcb_err_en;
+
 	/*
 	 * Handlers for outgoing data so that snoop/capture does not
 	 * have to have its hooks in the send path
 	 */
-	send_routine process_pio_send;
+	send_routine process_pio_send ____cacheline_aligned_in_smp;
 	send_routine process_dma_send;
 	void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
 				u64 pbc, const void *from, size_t count);
+	/* hfi1_pportdata, points to array of (physical) port-specific
+	 * data structs, indexed by pidx (0..n-1)
+	 */
+	struct hfi1_pportdata *pport;
+	/* receive context data */
+	struct hfi1_ctxtdata **rcd;
+	u64 __percpu *int_counter;
+	/* device (not port) flags, basically device capabilities */
+	u16 flags;
+	/* Number of physical ports available */
+	u8 num_pports;
+	/* Lowest context number which can be used by user processes */
+	u8 first_user_ctxt;
+	/* adding a new field here would make it part of this cacheline */
+
+	/* seqlock for sc2vl */
+	seqlock_t sc2vl_lock ____cacheline_aligned_in_smp;
+	u64 sc2vl[4];
+	/* receive interrupt functions */
+	rhf_rcv_function_ptr *rhf_rcv_function_map;
+	u64 __percpu *rcv_limit;
+	u16 rhf_offset; /* offset of RHF within receive header entry */
+	/* adding a new field here would make it part of this cacheline */
 
 	/* OUI comes from the HW. Used everywhere as 3 separate bytes. */
 	u8 oui1;
 	u8 oui2;
 	u8 oui3;
+	u8 dc_shutdown;
+
 	/* Timer and counter used to detect RcvBufOvflCnt changes */
 	struct timer_list rcverr_timer;
-	u32 rcv_ovfl_cnt;
 
 	wait_queue_head_t event_queue;
 
-	/* Save the enabled LCB error bits */
-	u64 lcb_err_en;
-	u8 dc_shutdown;
-
 	/* receive context tail dummy address */
 	__le64 *rcvhdrtail_dummy_kvaddr;
 	dma_addr_t rcvhdrtail_dummy_dma;
 
-	bool eprom_available;	/* true if EPROM is available for this device */
-	bool aspm_supported;	/* Does HW support ASPM */
-	bool aspm_enabled;	/* ASPM state: enabled/disabled */
+	u32 rcv_ovfl_cnt;
 	/* Serialize ASPM enable/disable between multiple verbs contexts */
 	spinlock_t aspm_lock;
 	/* Number of verbs contexts which have disabled ASPM */
 	atomic_t aspm_disabled_cnt;
 
-	struct hfi1_affinity *affinity;
+	bool eprom_available;	/* true if EPROM is available for this device */
+	bool aspm_supported;	/* Does HW support ASPM */
+	bool aspm_enabled;	/* ASPM state: enabled/disabled */
 	struct rhashtable sdma_rht;
+
 	struct kobject kobj;
 };
 
@@ -1633,6 +1646,17 @@
 }
 
 /*
+ * Return the indexed GUID from the port GUIDs table.
+ */
+static inline __be64 get_sguid(struct hfi1_ibport *ibp, unsigned int index)
+{
+	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+
+	WARN_ON(index >= HFI1_GUIDS_PER_PORT);
+	return cpu_to_be64(ppd->guids[index]);
+}
+
+/*
  * Called by readers of cc_state only, must call under rcu_read_lock().
  */
 static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd)
@@ -2003,6 +2027,12 @@
 	return i2c_target(dd->hfi1_id);
 }
 
+/* Is this device integrated or discrete? */
+static inline bool is_integrated(struct hfi1_devdata *dd)
+{
+	return dd->pcidev->device == PCI_DEVICE_ID_INTEL1;
+}
+
 int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
 
 #define DD_DEV_ENTRY(dd)       __string(dev, dev_name(&(dd)->pcidev->dev))
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 2ec6ef3..d9740dd 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -64,6 +64,7 @@
 /**
  * struct iowait - linkage for delayed progress/waiting
  * @list: used to add/insert into QP/PQ wait lists
+ * @lock: uses to record the list head lock
  * @tx_head: overflow list of sdma_txreq's
  * @sleep: no space callback
  * @wakeup: space callback wakeup
@@ -91,6 +92,11 @@
  * so sleeping is not allowed.
  *
  * The wait_dma member along with the iow
+ *
+ * The lock field is used by waiters to record
+ * the seqlock_t that guards the list head.
+ * Waiters explicity know that, but the destroy
+ * code that unwaits QPs does not.
  */
 
 struct iowait {
@@ -103,6 +109,7 @@
 		unsigned seq);
 	void (*wakeup)(struct iowait *wait, int reason);
 	void (*sdma_drained)(struct iowait *wait);
+	seqlock_t *lock;
 	struct work_struct iowork;
 	wait_queue_head_t wait_dma;
 	wait_queue_head_t wait_pio;
@@ -141,6 +148,7 @@
 	void (*sdma_drained)(struct iowait *wait))
 {
 	wait->count = 0;
+	wait->lock = NULL;
 	INIT_LIST_HEAD(&wait->list);
 	INIT_LIST_HEAD(&wait->tx_head);
 	INIT_WORK(&wait->iowork, func);
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 320e4da..6e595af 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -336,20 +336,20 @@
 	ni = (struct opa_node_info *)data;
 
 	/* GUID 0 is illegal */
-	if (am || pidx >= dd->num_pports || dd->pport[pidx].guid == 0) {
+	if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 ||
+	    get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	ni->port_guid = cpu_to_be64(dd->pport[pidx].guid);
+	ni->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
 	ni->base_version = OPA_MGMT_BASE_VERSION;
 	ni->class_version = OPA_SM_CLASS_VERSION;
 	ni->node_type = 1;     /* channel adapter */
 	ni->num_ports = ibdev->phys_port_cnt;
 	/* This is already in network order */
 	ni->system_image_guid = ib_hfi1_sys_image_guid;
-	/* Use first-port GUID as node */
-	ni->node_guid = cpu_to_be64(dd->pport->guid);
+	ni->node_guid = ibdev->node_guid;
 	ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
 	ni->device_id = cpu_to_be16(dd->pcidev->device);
 	ni->revision = cpu_to_be32(dd->minrev);
@@ -373,19 +373,20 @@
 
 	/* GUID 0 is illegal */
 	if (smp->attr_mod || pidx >= dd->num_pports ||
-	    dd->pport[pidx].guid == 0)
+	    ibdev->node_guid == 0 ||
+	    get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
 		smp->status |= IB_SMP_INVALID_FIELD;
-	else
-		nip->port_guid = cpu_to_be64(dd->pport[pidx].guid);
+		return reply((struct ib_mad_hdr *)smp);
+	}
 
+	nip->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
 	nip->base_version = OPA_MGMT_BASE_VERSION;
 	nip->class_version = OPA_SM_CLASS_VERSION;
 	nip->node_type = 1;     /* channel adapter */
 	nip->num_ports = ibdev->phys_port_cnt;
 	/* This is already in network order */
 	nip->sys_guid = ib_hfi1_sys_image_guid;
-	 /* Use first-port GUID as node */
-	nip->node_guid = cpu_to_be64(dd->pport->guid);
+	nip->node_guid = ibdev->node_guid;
 	nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
 	nip->device_id = cpu_to_be16(dd->pcidev->device);
 	nip->revision = cpu_to_be32(dd->minrev);
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index 7ad3089..ccbf52c 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -81,7 +81,7 @@
 		      struct list_head *del_list);
 static void handle_remove(struct work_struct *work);
 
-static struct mmu_notifier_ops mn_opts = {
+static const struct mmu_notifier_ops mn_opts = {
 	.invalidate_page = mmu_notifier_page,
 	.invalidate_range_start = mmu_notifier_range_start,
 };
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 50a3a36..64c9eeb 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -765,6 +765,7 @@
 	sc->hw_context = hw_context;
 	cr_group_addresses(sc, &dma);
 	sc->credits = sci->credits;
+	sc->size = sc->credits * PIO_BLOCK_SIZE;
 
 /* PIO Send Memory Address details */
 #define PIO_ADDR_CONTEXT_MASK 0xfful
@@ -1249,6 +1250,7 @@
 	sc->free = 0;
 	sc->alloc_free = 0;
 	sc->fill = 0;
+	sc->fill_wrap = 0;
 	sc->sr_head = 0;
 	sc->sr_tail = 0;
 	sc->flags = 0;
@@ -1392,7 +1394,7 @@
 	unsigned long flags;
 	unsigned long avail;
 	unsigned long blocks = dwords_to_blocks(dw_len);
-	unsigned long start_fill;
+	u32 fill_wrap;
 	int trycount = 0;
 	u32 head, next;
 
@@ -1417,9 +1419,7 @@
 			(sc->fill - sc->alloc_free);
 		if (blocks > avail) {
 			/* still no room, actively update */
-			spin_unlock_irqrestore(&sc->alloc_lock, flags);
 			sc_release_update(sc);
-			spin_lock_irqsave(&sc->alloc_lock, flags);
 			sc->alloc_free = ACCESS_ONCE(sc->free);
 			trycount++;
 			goto retry;
@@ -1435,8 +1435,11 @@
 	head = sc->sr_head;
 
 	/* "allocate" the buffer */
-	start_fill = sc->fill;
 	sc->fill += blocks;
+	fill_wrap = sc->fill_wrap;
+	sc->fill_wrap += blocks;
+	if (sc->fill_wrap >= sc->credits)
+		sc->fill_wrap = sc->fill_wrap - sc->credits;
 
 	/*
 	 * Fill the parts that the releaser looks at before moving the head.
@@ -1465,11 +1468,8 @@
 	spin_unlock_irqrestore(&sc->alloc_lock, flags);
 
 	/* finish filling in the buffer outside the lock */
-	pbuf->start = sc->base_addr + ((start_fill % sc->credits)
-							* PIO_BLOCK_SIZE);
-	pbuf->size = sc->credits * PIO_BLOCK_SIZE;
-	pbuf->end = sc->base_addr + pbuf->size;
-	pbuf->block_count = blocks;
+	pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE;
+	pbuf->end = sc->base_addr + sc->size;
 	pbuf->qw_written = 0;
 	pbuf->carry_bytes = 0;
 	pbuf->carry.val64 = 0;
@@ -1580,6 +1580,7 @@
 		qp = iowait_to_qp(wait);
 		priv = qp->priv;
 		list_del_init(&priv->s_iowait.list);
+		priv->s_iowait.lock = NULL;
 		/* refcount held until actual wake up */
 		qps[n++] = qp;
 	}
@@ -2035,29 +2036,17 @@
 int init_credit_return(struct hfi1_devdata *dd)
 {
 	int ret;
-	int num_numa;
 	int i;
 
-	num_numa = num_online_nodes();
-	/* enforce the expectation that the numas are compact */
-	for (i = 0; i < num_numa; i++) {
-		if (!node_online(i)) {
-			dd_dev_err(dd, "NUMA nodes are not compact\n");
-			ret = -EINVAL;
-			goto done;
-		}
-	}
-
 	dd->cr_base = kcalloc(
-		num_numa,
+		node_affinity.num_possible_nodes,
 		sizeof(struct credit_return_base),
 		GFP_KERNEL);
 	if (!dd->cr_base) {
-		dd_dev_err(dd, "Unable to allocate credit return base\n");
 		ret = -ENOMEM;
 		goto done;
 	}
-	for (i = 0; i < num_numa; i++) {
+	for_each_node_with_cpus(i) {
 		int bytes = TXE_NUM_CONTEXTS * sizeof(struct credit_return);
 
 		set_dev_node(&dd->pcidev->dev, i);
@@ -2084,14 +2073,11 @@
 
 void free_credit_return(struct hfi1_devdata *dd)
 {
-	int num_numa;
 	int i;
 
 	if (!dd->cr_base)
 		return;
-
-	num_numa = num_online_nodes();
-	for (i = 0; i < num_numa; i++) {
+	for (i = 0; i < node_affinity.num_possible_nodes; i++) {
 		if (dd->cr_base[i].va) {
 			dma_free_coherent(&dd->pcidev->dev,
 					  TXE_NUM_CONTEXTS *
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index e709eaf..867e5ff 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -83,53 +83,55 @@
 	void *arg;		/* argument for cb */
 	void __iomem *start;	/* buffer start address */
 	void __iomem *end;	/* context end address */
-	unsigned long size;	/* context size, in bytes */
 	unsigned long sent_at;	/* buffer is sent when <= free */
-	u32 block_count;	/* size of buffer, in blocks */
-	u32 qw_written;		/* QW written so far */
-	u32 carry_bytes;	/* number of valid bytes in carry */
 	union mix carry;	/* pending unwritten bytes */
+	u16 qw_written;		/* QW written so far */
+	u8 carry_bytes;	/* number of valid bytes in carry */
 };
 
 /* cache line aligned pio buffer array */
 union pio_shadow_ring {
 	struct pio_buf pbuf;
-	u64 unused[16];		/* cache line spacer */
 } ____cacheline_aligned;
 
 /* per-NUMA send context */
 struct send_context {
 	/* read-only after init */
 	struct hfi1_devdata *dd;		/* device */
-	void __iomem *base_addr;	/* start of PIO memory */
 	union pio_shadow_ring *sr;	/* shadow ring */
+	void __iomem *base_addr;	/* start of PIO memory */
+	u32 __percpu *buffers_allocated;/* count of buffers allocated */
+	u32 size;			/* context size, in bytes */
 
-	volatile __le64 *hw_free;	/* HW free counter */
-	struct work_struct halt_work;	/* halted context work queue entry */
-	unsigned long flags;		/* flags */
 	int node;			/* context home node */
-	int type;			/* context type */
-	u32 sw_index;			/* software index number */
-	u32 hw_context;			/* hardware context number */
-	u32 credits;			/* number of blocks in context */
 	u32 sr_size;			/* size of the shadow ring */
-	u32 group;			/* credit return group */
+	u16 flags;			/* flags */
+	u8  type;			/* context type */
+	u8  sw_index;			/* software index number */
+	u8  hw_context;			/* hardware context number */
+	u8  group;			/* credit return group */
+
 	/* allocator fields */
 	spinlock_t alloc_lock ____cacheline_aligned_in_smp;
+	u32 sr_head;			/* shadow ring head */
 	unsigned long fill;		/* official alloc count */
 	unsigned long alloc_free;	/* copy of free (less cache thrash) */
-	u32 sr_head;			/* shadow ring head */
+	u32 fill_wrap;			/* tracks fill within ring */
+	u32 credits;			/* number of blocks in context */
+	/* adding a new field here would make it part of this cacheline */
+
 	/* releaser fields */
 	spinlock_t release_lock ____cacheline_aligned_in_smp;
-	unsigned long free;		/* official free count */
 	u32 sr_tail;			/* shadow ring tail */
+	unsigned long free;		/* official free count */
+	volatile __le64 *hw_free;	/* HW free counter */
 	/* list for PIO waiters */
 	struct list_head piowait  ____cacheline_aligned_in_smp;
 	spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp;
-	u64 credit_ctrl;		/* cache for credit control */
 	u32 credit_intr_count;		/* count of credit intr users */
-	u32 __percpu *buffers_allocated;/* count of buffers allocated */
+	u64 credit_ctrl;		/* cache for credit control */
 	wait_queue_head_t halt_wait;    /* wait until kernel sees interrupt */
+	struct work_struct halt_work;	/* halted context work queue entry */
 };
 
 /* send context flags */
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index aa77736..03024ce 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -129,8 +129,8 @@
 				dest += sizeof(u64);
 			}
 
-			dest -= pbuf->size;
-			dend -= pbuf->size;
+			dest -= pbuf->sc->size;
+			dend -= pbuf->sc->size;
 		}
 
 		/* write 8-byte non-SOP, non-wrap chunk data */
@@ -361,8 +361,8 @@
 				dest += sizeof(u64);
 			}
 
-			dest -= pbuf->size;
-			dend -= pbuf->size;
+			dest -= pbuf->sc->size;
+			dend -= pbuf->sc->size;
 		}
 
 		/* write 8-byte non-SOP, non-wrap chunk data */
@@ -458,8 +458,8 @@
 			dest += sizeof(u64);
 		}
 
-		dest -= pbuf->size;
-		dend -= pbuf->size;
+		dest -= pbuf->sc->size;
+		dend -= pbuf->sc->size;
 	}
 
 	/* write 8-byte non-SOP, non-wrap chunk data */
@@ -492,7 +492,7 @@
 		 */
 		/* adjust if we have wrapped */
 		if (dest >= pbuf->end)
-			dest -= pbuf->size;
+			dest -= pbuf->sc->size;
 		/* jump to the SOP range if within the first block */
 		else if (pbuf->qw_written < PIO_BLOCK_QWS)
 			dest += SOP_DISTANCE;
@@ -584,8 +584,8 @@
 			dest += sizeof(u64);
 		}
 
-		dest -= pbuf->size;
-		dend -= pbuf->size;
+		dest -= pbuf->sc->size;
+		dend -= pbuf->sc->size;
 	}
 
 	/* write 8-byte non-SOP, non-wrap chunk data */
@@ -666,7 +666,7 @@
 			 */
 			/* adjust if we've wrapped */
 			if (dest >= pbuf->end)
-				dest -= pbuf->size;
+				dest -= pbuf->sc->size;
 			/* jump to SOP range if within the first block */
 			else if (pbuf->qw_written < PIO_BLOCK_QWS)
 				dest += SOP_DISTANCE;
@@ -719,7 +719,7 @@
 	 */
 	/* adjust if we have wrapped */
 	if (dest >= pbuf->end)
-		dest -= pbuf->size;
+		dest -= pbuf->sc->size;
 	/* jump to the SOP range if within the first block */
 	else if (pbuf->qw_written < PIO_BLOCK_QWS)
 		dest += SOP_DISTANCE;
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 2024331..838fe84 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -49,6 +49,90 @@
 #include "efivar.h"
 #include "eprom.h"
 
+static int validate_scratch_checksum(struct hfi1_devdata *dd)
+{
+	u64 checksum = 0, temp_scratch = 0;
+	int i, j, version;
+
+	temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH);
+	version = (temp_scratch & BITMAP_VERSION_SMASK) >> BITMAP_VERSION_SHIFT;
+
+	/* Prevent power on default of all zeroes from passing checksum */
+	if (!version)
+		return 0;
+
+	/*
+	 * ASIC scratch 0 only contains the checksum and bitmap version as
+	 * fields of interest, both of which are handled separately from the
+	 * loop below, so skip it
+	 */
+	checksum += version;
+	for (i = 1; i < ASIC_NUM_SCRATCH; i++) {
+		temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH + (8 * i));
+		for (j = sizeof(u64); j != 0; j -= 2) {
+			checksum += (temp_scratch & 0xFFFF);
+			temp_scratch >>= 16;
+		}
+	}
+
+	while (checksum >> 16)
+		checksum = (checksum & CHECKSUM_MASK) + (checksum >> 16);
+
+	temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH);
+	temp_scratch &= CHECKSUM_SMASK;
+	temp_scratch >>= CHECKSUM_SHIFT;
+
+	if (checksum + temp_scratch == 0xFFFF)
+		return 1;
+	return 0;
+}
+
+static void save_platform_config_fields(struct hfi1_devdata *dd)
+{
+	struct hfi1_pportdata *ppd = dd->pport;
+	u64 temp_scratch = 0, temp_dest = 0;
+
+	temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH_1);
+
+	temp_dest = temp_scratch &
+		    (dd->hfi1_id ? PORT1_PORT_TYPE_SMASK :
+		     PORT0_PORT_TYPE_SMASK);
+	ppd->port_type = temp_dest >>
+			 (dd->hfi1_id ? PORT1_PORT_TYPE_SHIFT :
+			  PORT0_PORT_TYPE_SHIFT);
+
+	temp_dest = temp_scratch &
+		    (dd->hfi1_id ? PORT1_LOCAL_ATTEN_SMASK :
+		     PORT0_LOCAL_ATTEN_SMASK);
+	ppd->local_atten = temp_dest >>
+			   (dd->hfi1_id ? PORT1_LOCAL_ATTEN_SHIFT :
+			    PORT0_LOCAL_ATTEN_SHIFT);
+
+	temp_dest = temp_scratch &
+		    (dd->hfi1_id ? PORT1_REMOTE_ATTEN_SMASK :
+		     PORT0_REMOTE_ATTEN_SMASK);
+	ppd->remote_atten = temp_dest >>
+			    (dd->hfi1_id ? PORT1_REMOTE_ATTEN_SHIFT :
+			     PORT0_REMOTE_ATTEN_SHIFT);
+
+	temp_dest = temp_scratch &
+		    (dd->hfi1_id ? PORT1_DEFAULT_ATTEN_SMASK :
+		     PORT0_DEFAULT_ATTEN_SMASK);
+	ppd->default_atten = temp_dest >>
+			     (dd->hfi1_id ? PORT1_DEFAULT_ATTEN_SHIFT :
+			      PORT0_DEFAULT_ATTEN_SHIFT);
+
+	temp_scratch = read_csr(dd, dd->hfi1_id ? ASIC_CFG_SCRATCH_3 :
+				ASIC_CFG_SCRATCH_2);
+
+	ppd->tx_preset_eq = (temp_scratch & TX_EQ_SMASK) >> TX_EQ_SHIFT;
+	ppd->tx_preset_noeq = (temp_scratch & TX_NO_EQ_SMASK) >> TX_NO_EQ_SHIFT;
+	ppd->rx_preset = (temp_scratch & RX_SMASK) >> RX_SHIFT;
+
+	ppd->max_power_class = (temp_scratch & QSFP_MAX_POWER_SMASK) >>
+				QSFP_MAX_POWER_SHIFT;
+}
+
 void get_platform_config(struct hfi1_devdata *dd)
 {
 	int ret = 0;
@@ -56,38 +140,49 @@
 	u8 *temp_platform_config = NULL;
 	u32 esize;
 
-	ret = eprom_read_platform_config(dd, (void **)&temp_platform_config,
-					 &esize);
-	if (!ret) {
-		/* success */
-		size = esize;
-		goto success;
+	if (is_integrated(dd)) {
+		if (validate_scratch_checksum(dd)) {
+			save_platform_config_fields(dd);
+			return;
+		}
+		dd_dev_err(dd, "%s: Config bitmap corrupted/uninitialized\n",
+			   __func__);
+		dd_dev_err(dd,
+			   "%s: Please update your BIOS to support active channels\n",
+			   __func__);
+	} else {
+		ret = eprom_read_platform_config(dd,
+						 (void **)&temp_platform_config,
+						 &esize);
+		if (!ret) {
+			/* success */
+			dd->platform_config.data = temp_platform_config;
+			dd->platform_config.size = esize;
+			return;
+		}
+		/* fail, try EFI variable */
+
+		ret = read_hfi1_efi_var(dd, "configuration", &size,
+					(void **)&temp_platform_config);
+		if (!ret) {
+			dd->platform_config.data = temp_platform_config;
+			dd->platform_config.size = size;
+			return;
+		}
 	}
-	/* fail, try EFI variable */
-
-	ret = read_hfi1_efi_var(dd, "configuration", &size,
-				(void **)&temp_platform_config);
-	if (!ret)
-		goto success;
-
-	dd_dev_info(dd,
-		    "%s: Failed to get platform config from UEFI, falling back to request firmware\n",
-		    __func__);
+	dd_dev_err(dd,
+		   "%s: Failed to get platform config, falling back to sub-optimal default file\n",
+		   __func__);
 	/* fall back to request firmware */
 	platform_config_load = 1;
-	return;
-
-success:
-	dd->platform_config.data = temp_platform_config;
-	dd->platform_config.size = size;
 }
 
 void free_platform_config(struct hfi1_devdata *dd)
 {
 	if (!platform_config_load) {
 		/*
-		 * was loaded from EFI, release memory
-		 * allocated by read_efi_var
+		 * was loaded from EFI or the EPROM, release memory
+		 * allocated by read_efi_var/eprom_read_platform_config
 		 */
 		kfree(dd->platform_config.data);
 	}
@@ -100,12 +195,16 @@
 void get_port_type(struct hfi1_pportdata *ppd)
 {
 	int ret;
+	u32 temp;
 
 	ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
-					PORT_TABLE_PORT_TYPE, &ppd->port_type,
+					PORT_TABLE_PORT_TYPE, &temp,
 					4);
-	if (ret)
+	if (ret) {
 		ppd->port_type = PORT_TYPE_UNKNOWN;
+		return;
+	}
+	ppd->port_type = temp;
 }
 
 int set_qsfp_tx(struct hfi1_pportdata *ppd, int on)
@@ -538,6 +637,38 @@
 	}
 }
 
+/*
+ * Return a special SerDes setting for low power AOC cables.  The power class
+ * threshold and setting being used were all found by empirical testing.
+ *
+ * Summary of the logic:
+ *
+ * if (QSFP and QSFP_TYPE == AOC and QSFP_POWER_CLASS < 4)
+ *     return 0xe
+ * return 0; // leave at default
+ */
+static u8 aoc_low_power_setting(struct hfi1_pportdata *ppd)
+{
+	u8 *cache = ppd->qsfp_info.cache;
+	int power_class;
+
+	/* QSFP only */
+	if (ppd->port_type != PORT_TYPE_QSFP)
+		return 0; /* leave at default */
+
+	/* active optical cables only */
+	switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) {
+	case 0x0 ... 0x9: /* fallthrough */
+	case 0xC: /* fallthrough */
+	case 0xE:
+		/* active AOC */
+		power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
+		if (power_class < QSFP_POWER_CLASS_4)
+			return 0xe;
+	}
+	return 0; /* leave at default */
+}
+
 static void apply_tunings(
 		struct hfi1_pportdata *ppd, u32 tx_preset_index,
 		u8 tuning_method, u32 total_atten, u8 limiting_active)
@@ -606,7 +737,17 @@
 		tx_preset_index, TX_PRESET_TABLE_POSTCUR, &tx_preset, 4);
 	postcur = tx_preset;
 
-	config_data = precur | (attn << 8) | (postcur << 16);
+	/*
+	 * NOTES:
+	 * o The aoc_low_power_setting is applied to all lanes even
+	 *   though only lane 0's value is examined by the firmware.
+	 * o A lingering low power setting after a cable swap does
+	 *   not occur.  On cable unplug the 8051 is reset and
+	 *   restarted on cable insert.  This resets all settings to
+	 *   their default, erasing any previous low power setting.
+	 */
+	config_data = precur | (attn << 8) | (postcur << 16) |
+			(aoc_low_power_setting(ppd) << 24);
 
 	apply_tx_lanes(ppd, TX_EQ_SETTINGS, config_data,
 		       "Applying TX settings");
diff --git a/drivers/infiniband/hw/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h
index e2c2161..eed0aa9 100644
--- a/drivers/infiniband/hw/hfi1/platform.h
+++ b/drivers/infiniband/hw/hfi1/platform.h
@@ -168,16 +168,6 @@
 	struct platform_config_data config_tables[PLATFORM_CONFIG_TABLE_MAX];
 };
 
-static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = {
-	0,
-	SYSTEM_TABLE_MAX,
-	PORT_TABLE_MAX,
-	RX_PRESET_TABLE_MAX,
-	TX_PRESET_TABLE_MAX,
-	QSFP_ATTEN_TABLE_MAX,
-	VARIABLE_SETTINGS_TABLE_MAX
-};
-
 /* This section defines default values and encodings for the
  * fields defined for each table above
  */
@@ -295,6 +285,123 @@
 	OPA_UNKNOWN_TUNING
 };
 
+/*
+ * Shifts and masks for the link SI tuning values stuffed into the ASIC scratch
+ * registers for integrated platforms
+ */
+#define PORT0_PORT_TYPE_SHIFT		0
+#define PORT0_LOCAL_ATTEN_SHIFT		4
+#define PORT0_REMOTE_ATTEN_SHIFT	10
+#define PORT0_DEFAULT_ATTEN_SHIFT	32
+
+#define PORT1_PORT_TYPE_SHIFT		16
+#define PORT1_LOCAL_ATTEN_SHIFT		20
+#define PORT1_REMOTE_ATTEN_SHIFT	26
+#define PORT1_DEFAULT_ATTEN_SHIFT	40
+
+#define PORT0_PORT_TYPE_MASK		0xFUL
+#define PORT0_LOCAL_ATTEN_MASK		0x3FUL
+#define PORT0_REMOTE_ATTEN_MASK		0x3FUL
+#define PORT0_DEFAULT_ATTEN_MASK	0xFFUL
+
+#define PORT1_PORT_TYPE_MASK		0xFUL
+#define PORT1_LOCAL_ATTEN_MASK		0x3FUL
+#define PORT1_REMOTE_ATTEN_MASK		0x3FUL
+#define PORT1_DEFAULT_ATTEN_MASK	0xFFUL
+
+#define PORT0_PORT_TYPE_SMASK		(PORT0_PORT_TYPE_MASK << \
+					 PORT0_PORT_TYPE_SHIFT)
+#define PORT0_LOCAL_ATTEN_SMASK		(PORT0_LOCAL_ATTEN_MASK << \
+					 PORT0_LOCAL_ATTEN_SHIFT)
+#define PORT0_REMOTE_ATTEN_SMASK	(PORT0_REMOTE_ATTEN_MASK << \
+					 PORT0_REMOTE_ATTEN_SHIFT)
+#define PORT0_DEFAULT_ATTEN_SMASK	(PORT0_DEFAULT_ATTEN_MASK << \
+					 PORT0_DEFAULT_ATTEN_SHIFT)
+
+#define PORT1_PORT_TYPE_SMASK		(PORT1_PORT_TYPE_MASK << \
+					 PORT1_PORT_TYPE_SHIFT)
+#define PORT1_LOCAL_ATTEN_SMASK		(PORT1_LOCAL_ATTEN_MASK << \
+					 PORT1_LOCAL_ATTEN_SHIFT)
+#define PORT1_REMOTE_ATTEN_SMASK	(PORT1_REMOTE_ATTEN_MASK << \
+					 PORT1_REMOTE_ATTEN_SHIFT)
+#define PORT1_DEFAULT_ATTEN_SMASK	(PORT1_DEFAULT_ATTEN_MASK << \
+					 PORT1_DEFAULT_ATTEN_SHIFT)
+
+#define QSFP_MAX_POWER_SHIFT		0
+#define TX_NO_EQ_SHIFT			4
+#define TX_EQ_SHIFT			25
+#define RX_SHIFT			46
+
+#define QSFP_MAX_POWER_MASK		0xFUL
+#define TX_NO_EQ_MASK			0x1FFFFFUL
+#define TX_EQ_MASK			0x1FFFFFUL
+#define RX_MASK				0xFFFFUL
+
+#define QSFP_MAX_POWER_SMASK		(QSFP_MAX_POWER_MASK << \
+					 QSFP_MAX_POWER_SHIFT)
+#define TX_NO_EQ_SMASK			(TX_NO_EQ_MASK << TX_NO_EQ_SHIFT)
+#define TX_EQ_SMASK			(TX_EQ_MASK << TX_EQ_SHIFT)
+#define RX_SMASK			(RX_MASK << RX_SHIFT)
+
+#define TX_PRECUR_SHIFT			0
+#define TX_ATTN_SHIFT			4
+#define QSFP_TX_CDR_APPLY_SHIFT		9
+#define QSFP_TX_EQ_APPLY_SHIFT		10
+#define QSFP_TX_CDR_SHIFT		11
+#define QSFP_TX_EQ_SHIFT		12
+#define TX_POSTCUR_SHIFT		16
+
+#define TX_PRECUR_MASK			0xFUL
+#define TX_ATTN_MASK			0x1FUL
+#define QSFP_TX_CDR_APPLY_MASK		0x1UL
+#define QSFP_TX_EQ_APPLY_MASK		0x1UL
+#define QSFP_TX_CDR_MASK		0x1UL
+#define QSFP_TX_EQ_MASK			0xFUL
+#define TX_POSTCUR_MASK			0x1FUL
+
+#define TX_PRECUR_SMASK			(TX_PRECUR_MASK << TX_PRECUR_SHIFT)
+#define TX_ATTN_SMASK			(TX_ATTN_MASK << TX_ATTN_SHIFT)
+#define QSFP_TX_CDR_APPLY_SMASK		(QSFP_TX_CDR_APPLY_MASK << \
+					 QSFP_TX_CDR_APPLY_SHIFT)
+#define QSFP_TX_EQ_APPLY_SMASK		(QSFP_TX_EQ_APPLY_MASK << \
+					 QSFP_TX_EQ_APPLY_SHIFT)
+#define QSFP_TX_CDR_SMASK		(QSFP_TX_CDR_MASK << QSFP_TX_CDR_SHIFT)
+#define QSFP_TX_EQ_SMASK		(QSFP_TX_EQ_MASK << QSFP_TX_EQ_SHIFT)
+#define TX_POSTCUR_SMASK		(TX_POSTCUR_MASK << TX_POSTCUR_SHIFT)
+
+#define QSFP_RX_CDR_APPLY_SHIFT		0
+#define QSFP_RX_EMP_APPLY_SHIFT		1
+#define QSFP_RX_AMP_APPLY_SHIFT		2
+#define QSFP_RX_CDR_SHIFT		3
+#define QSFP_RX_EMP_SHIFT		4
+#define QSFP_RX_AMP_SHIFT		8
+
+#define QSFP_RX_CDR_APPLY_MASK		0x1UL
+#define QSFP_RX_EMP_APPLY_MASK		0x1UL
+#define QSFP_RX_AMP_APPLY_MASK		0x1UL
+#define QSFP_RX_CDR_MASK		0x1UL
+#define QSFP_RX_EMP_MASK		0xFUL
+#define QSFP_RX_AMP_MASK		0x3UL
+
+#define QSFP_RX_CDR_APPLY_SMASK		(QSFP_RX_CDR_APPLY_MASK << \
+					 QSFP_RX_CDR_APPLY_SHIFT)
+#define QSFP_RX_EMP_APPLY_SMASK		(QSFP_RX_EMP_APPLY_MASK << \
+					 QSFP_RX_EMP_APPLY_SHIFT)
+#define QSFP_RX_AMP_APPLY_SMASK		(QSFP_RX_AMP_APPLY_MASK << \
+					 QSFP_RX_AMP_APPLY_SHIFT)
+#define QSFP_RX_CDR_SMASK		(QSFP_RX_CDR_MASK << QSFP_RX_CDR_SHIFT)
+#define QSFP_RX_EMP_SMASK		(QSFP_RX_EMP_MASK << QSFP_RX_EMP_SHIFT)
+#define QSFP_RX_AMP_SMASK		(QSFP_RX_AMP_MASK << QSFP_RX_AMP_SHIFT)
+
+#define BITMAP_VERSION			1
+#define BITMAP_VERSION_SHIFT		44
+#define BITMAP_VERSION_MASK		0xFUL
+#define BITMAP_VERSION_SMASK		(BITMAP_VERSION_MASK << \
+					 BITMAP_VERSION_SHIFT)
+#define CHECKSUM_SHIFT			48
+#define CHECKSUM_MASK			0xFFFFUL
+#define CHECKSUM_SMASK			(CHECKSUM_MASK << CHECKSUM_SHIFT)
+
 /* platform.c */
 void get_platform_config(struct hfi1_devdata *dd);
 void free_platform_config(struct hfi1_devdata *dd);
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 9fc75e7..d752d67 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -196,15 +196,18 @@
 static void flush_iowait(struct rvt_qp *qp)
 {
 	struct hfi1_qp_priv *priv = qp->priv;
-	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
 	unsigned long flags;
+	seqlock_t *lock = priv->s_iowait.lock;
 
-	write_seqlock_irqsave(&dev->iowait_lock, flags);
+	if (!lock)
+		return;
+	write_seqlock_irqsave(lock, flags);
 	if (!list_empty(&priv->s_iowait.list)) {
 		list_del_init(&priv->s_iowait.list);
+		priv->s_iowait.lock = NULL;
 		rvt_put_qp(qp);
 	}
-	write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+	write_sequnlock_irqrestore(lock, flags);
 }
 
 static inline int opa_mtu_enum_to_int(int mtu)
@@ -543,6 +546,7 @@
 			ibp->rvp.n_dmawait++;
 			qp->s_flags |= RVT_S_WAIT_DMA_DESC;
 			list_add_tail(&priv->s_iowait.list, &sde->dmawait);
+			priv->s_iowait.lock = &dev->iowait_lock;
 			trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
 			rvt_get_qp(qp);
 		}
@@ -964,6 +968,7 @@
 	if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
 		qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
 		list_del_init(&priv->s_iowait.list);
+		priv->s_iowait.lock = NULL;
 		rvt_put_qp(qp);
 	}
 	write_sequnlock(&dev->iowait_lock);
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 8bc5013..9db260f 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -276,7 +276,7 @@
 				rvt_get_mr(ps->s_txreq->mr);
 			qp->s_ack_rdma_sge.sge = e->rdma_sge;
 			qp->s_ack_rdma_sge.num_sge = 1;
-			qp->s_cur_sge = &qp->s_ack_rdma_sge;
+			ps->s_txreq->ss = &qp->s_ack_rdma_sge;
 			if (len > pmtu) {
 				len = pmtu;
 				qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
@@ -290,7 +290,7 @@
 			bth2 = mask_psn(qp->s_ack_rdma_psn++);
 		} else {
 			/* COMPARE_SWAP or FETCH_ADD */
-			qp->s_cur_sge = NULL;
+			ps->s_txreq->ss = NULL;
 			len = 0;
 			qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
 			ohdr->u.at.aeth = hfi1_compute_aeth(qp);
@@ -306,7 +306,7 @@
 		qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
 		/* FALLTHROUGH */
 	case OP(RDMA_READ_RESPONSE_MIDDLE):
-		qp->s_cur_sge = &qp->s_ack_rdma_sge;
+		ps->s_txreq->ss = &qp->s_ack_rdma_sge;
 		ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
 		if (ps->s_txreq->mr)
 			rvt_get_mr(ps->s_txreq->mr);
@@ -335,7 +335,7 @@
 		 */
 		qp->s_ack_state = OP(SEND_ONLY);
 		qp->s_flags &= ~RVT_S_ACK_PENDING;
-		qp->s_cur_sge = NULL;
+		ps->s_txreq->ss = NULL;
 		if (qp->s_nak_state)
 			ohdr->u.aeth =
 				cpu_to_be32((qp->r_msn & HFI1_MSN_MASK) |
@@ -351,7 +351,7 @@
 	qp->s_rdma_ack_cnt++;
 	qp->s_hdrwords = hwords;
 	ps->s_txreq->sde = priv->s_sde;
-	qp->s_cur_size = len;
+	ps->s_txreq->s_cur_size = len;
 	hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps);
 	/* pbc */
 	ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
@@ -801,8 +801,8 @@
 	qp->s_len -= len;
 	qp->s_hdrwords = hwords;
 	ps->s_txreq->sde = priv->s_sde;
-	qp->s_cur_sge = ss;
-	qp->s_cur_size = len;
+	ps->s_txreq->ss = ss;
+	ps->s_txreq->s_cur_size = len;
 	hfi1_make_ruc_header(
 		qp,
 		ohdr,
@@ -1146,8 +1146,6 @@
 {
 	struct ib_other_headers *ohdr;
 	struct rvt_swqe *wqe;
-	struct ib_wc wc;
-	unsigned i;
 	u32 opcode;
 	u32 psn;
 
@@ -1195,22 +1193,8 @@
 		qp->s_last = s_last;
 		/* see post_send() */
 		barrier();
-		for (i = 0; i < wqe->wr.num_sge; i++) {
-			struct rvt_sge *sge = &wqe->sg_list[i];
-
-			rvt_put_mr(sge->mr);
-		}
-		/* Post a send completion queue entry if requested. */
-		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
-		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
-			memset(&wc, 0, sizeof(wc));
-			wc.wr_id = wqe->wr.wr_id;
-			wc.status = IB_WC_SUCCESS;
-			wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
-			wc.byte_len = wqe->length;
-			wc.qp = &qp->ibqp;
-			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
-		}
+		rvt_put_swqe(wqe);
+		rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
 	}
 	/*
 	 * If we were waiting for sends to complete before re-sending,
@@ -1240,9 +1224,6 @@
 					 struct rvt_swqe *wqe,
 					 struct hfi1_ibport *ibp)
 {
-	struct ib_wc wc;
-	unsigned i;
-
 	lockdep_assert_held(&qp->s_lock);
 	/*
 	 * Don't decrement refcount and don't generate a
@@ -1253,28 +1234,14 @@
 	    cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
 		u32 s_last;
 
-		for (i = 0; i < wqe->wr.num_sge; i++) {
-			struct rvt_sge *sge = &wqe->sg_list[i];
-
-			rvt_put_mr(sge->mr);
-		}
+		rvt_put_swqe(wqe);
 		s_last = qp->s_last;
 		if (++s_last >= qp->s_size)
 			s_last = 0;
 		qp->s_last = s_last;
 		/* see post_send() */
 		barrier();
-		/* Post a send completion queue entry if requested. */
-		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
-		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
-			memset(&wc, 0, sizeof(wc));
-			wc.wr_id = wqe->wr.wr_id;
-			wc.status = IB_WC_SUCCESS;
-			wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
-			wc.byte_len = wqe->length;
-			wc.qp = &qp->ibqp;
-			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
-		}
+		rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
 	} else {
 		struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 
@@ -2295,7 +2262,7 @@
 		hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last);
 		rvt_put_ss(&qp->r_sge);
 		qp->r_msn++;
-		if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
+		if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 			break;
 		wc.wr_id = qp->r_wr_id;
 		wc.status = IB_WC_SUCCESS;
@@ -2410,8 +2377,7 @@
 			 * Update the next expected PSN.  We add 1 later
 			 * below, so only add the remainder here.
 			 */
-			if (len > pmtu)
-				qp->r_psn += (len - 1) / pmtu;
+			qp->r_psn += rvt_div_mtu(qp, len - 1);
 		} else {
 			e->rdma_sge.mr = NULL;
 			e->rdma_sge.vaddr = NULL;
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index a1576ae..717ed4b15 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -239,16 +239,6 @@
 	return ret;
 }
 
-static __be64 get_sguid(struct hfi1_ibport *ibp, unsigned index)
-{
-	if (!index) {
-		struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-
-		return cpu_to_be64(ppd->guid);
-	}
-	return ibp->guids[index - 1];
-}
-
 static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
 {
 	return (gid->global.interface_id == id &&
@@ -699,9 +689,9 @@
 	/* The SGID is 32-bit aligned. */
 	hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
 	hdr->sgid.global.interface_id =
-		grh->sgid_index && grh->sgid_index < ARRAY_SIZE(ibp->guids) ?
-		ibp->guids[grh->sgid_index - 1] :
-			cpu_to_be64(ppd_from_ibp(ibp)->guid);
+		grh->sgid_index < HFI1_GUIDS_PER_PORT ?
+		get_sguid(ibp, grh->sgid_index) :
+		get_sguid(ibp, HFI1_PORT_GUID_INDEX);
 	hdr->dgid = grh->dgid;
 
 	/* GRH header size in 32-bit words. */
@@ -777,8 +767,8 @@
 	u32 bth1;
 
 	/* Construct the header. */
-	extra_bytes = -qp->s_cur_size & 3;
-	nwords = (qp->s_cur_size + extra_bytes) >> 2;
+	extra_bytes = -ps->s_txreq->s_cur_size & 3;
+	nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2;
 	lrh0 = HFI1_LRH_BTH;
 	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
 		qp->s_hdrwords += hfi1_make_grh(ibp,
@@ -952,7 +942,6 @@
 			enum ib_wc_status status)
 {
 	u32 old_last, last;
-	unsigned i;
 
 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		return;
@@ -964,32 +953,13 @@
 	qp->s_last = last;
 	/* See post_send() */
 	barrier();
-	for (i = 0; i < wqe->wr.num_sge; i++) {
-		struct rvt_sge *sge = &wqe->sg_list[i];
-
-		rvt_put_mr(sge->mr);
-	}
+	rvt_put_swqe(wqe);
 	if (qp->ibqp.qp_type == IB_QPT_UD ||
 	    qp->ibqp.qp_type == IB_QPT_SMI ||
 	    qp->ibqp.qp_type == IB_QPT_GSI)
 		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
 
-	/* See ch. 11.2.4.1 and 10.7.3.1 */
-	if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
-	    (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
-	    status != IB_WC_SUCCESS) {
-		struct ib_wc wc;
-
-		memset(&wc, 0, sizeof(wc));
-		wc.wr_id = wqe->wr.wr_id;
-		wc.status = status;
-		wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
-		wc.qp = &qp->ibqp;
-		if (status == IB_WC_SUCCESS)
-			wc.byte_len = wqe->length;
-		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
-			     status != IB_WC_SUCCESS);
-	}
+	rvt_qp_swqe_complete(qp, wqe, status);
 
 	if (qp->s_acked == old_last)
 		qp->s_acked = last;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index fd39bca..7102a07 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -375,7 +375,7 @@
 			   sde->head_sn, tx->sn);
 	sde->head_sn++;
 #endif
-	sdma_txclean(sde->dd, tx);
+	__sdma_txclean(sde->dd, tx);
 	if (complete)
 		(*complete)(tx, res);
 	if (wait && iowait_sdma_dec(wait))
@@ -1643,7 +1643,7 @@
 }
 
 /**
- * sdma_txclean() - clean tx of mappings, descp *kmalloc's
+ * __sdma_txclean() - clean tx of mappings, descp *kmalloc's
  * @dd: hfi1_devdata for unmapping
  * @tx: tx request to clean
  *
@@ -1653,7 +1653,7 @@
  * The code can be called multiple times without issue.
  *
  */
-void sdma_txclean(
+void __sdma_txclean(
 	struct hfi1_devdata *dd,
 	struct sdma_txreq *tx)
 {
@@ -3080,7 +3080,7 @@
 		tx->descp[i] = tx->descs[i];
 	return 0;
 enomem:
-	sdma_txclean(dd, tx);
+	__sdma_txclean(dd, tx);
 	return -ENOMEM;
 }
 
@@ -3109,14 +3109,14 @@
 
 	rval = _extend_sdma_tx_descs(dd, tx);
 	if (rval) {
-		sdma_txclean(dd, tx);
+		__sdma_txclean(dd, tx);
 		return rval;
 	}
 
 	/* If coalesce buffer is allocated, copy data into it */
 	if (tx->coalesce_buf) {
 		if (type == SDMA_MAP_NONE) {
-			sdma_txclean(dd, tx);
+			__sdma_txclean(dd, tx);
 			return -EINVAL;
 		}
 
@@ -3124,7 +3124,7 @@
 			kvaddr = kmap(page);
 			kvaddr += offset;
 		} else if (WARN_ON(!kvaddr)) {
-			sdma_txclean(dd, tx);
+			__sdma_txclean(dd, tx);
 			return -EINVAL;
 		}
 
@@ -3154,7 +3154,7 @@
 				      DMA_TO_DEVICE);
 
 		if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
-			sdma_txclean(dd, tx);
+			__sdma_txclean(dd, tx);
 			return -ENOSPC;
 		}
 
@@ -3196,7 +3196,7 @@
 	if ((unlikely(tx->num_desc == tx->desc_limit))) {
 		rval = _extend_sdma_tx_descs(dd, tx);
 		if (rval) {
-			sdma_txclean(dd, tx);
+			__sdma_txclean(dd, tx);
 			return rval;
 		}
 	}
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 56257ea..21f1e28 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -667,7 +667,13 @@
 			   int type, void *kvaddr, struct page *page,
 			   unsigned long offset, u16 len);
 int _pad_sdma_tx_descs(struct hfi1_devdata *, struct sdma_txreq *);
-void sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *);
+void __sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *);
+
+static inline void sdma_txclean(struct hfi1_devdata *dd, struct sdma_txreq *tx)
+{
+	if (tx->num_desc)
+		__sdma_txclean(dd, tx);
+}
 
 /* helpers used by public routines */
 static inline void _sdma_close_tx(struct hfi1_devdata *dd,
@@ -753,7 +759,7 @@
 		       DMA_TO_DEVICE);
 
 	if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
-		sdma_txclean(dd, tx);
+		__sdma_txclean(dd, tx);
 		return -ENOSPC;
 	}
 
@@ -834,7 +840,7 @@
 		       DMA_TO_DEVICE);
 
 	if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
-		sdma_txclean(dd, tx);
+		__sdma_txclean(dd, tx);
 		return -ENOSPC;
 	}
 
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 5e6d1ba..b141a78 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -258,8 +258,8 @@
 	qp->s_len -= len;
 	qp->s_hdrwords = hwords;
 	ps->s_txreq->sde = priv->s_sde;
-	qp->s_cur_sge = &qp->s_sge;
-	qp->s_cur_size = len;
+	ps->s_txreq->ss = &qp->s_sge;
+	ps->s_txreq->s_cur_size = len;
 	hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
 			     mask_psn(qp->s_psn++), middle, ps);
 	/* pbc */
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 97ae24b..c071955 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -354,8 +354,8 @@
 
 	/* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
 	qp->s_hdrwords = 7;
-	qp->s_cur_size = wqe->length;
-	qp->s_cur_sge = &qp->s_sge;
+	ps->s_txreq->s_cur_size = wqe->length;
+	ps->s_txreq->ss = &qp->s_sge;
 	qp->s_srate = ah_attr->static_rate;
 	qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
 	qp->s_wqe = wqe;
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index a761f80..663980e 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -115,6 +115,7 @@
 #define KDETH_HCRC_LOWER_MASK     0xff
 
 #define AHG_KDETH_INTR_SHIFT 12
+#define AHG_KDETH_SH_SHIFT   13
 
 #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
 #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
@@ -144,8 +145,9 @@
 #define KDETH_OM_LARGE     64
 #define KDETH_OM_MAX_SIZE  (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
 
-/* Last packet in the request */
-#define TXREQ_FLAGS_REQ_LAST_PKT BIT(0)
+/* Tx request flag bits */
+#define TXREQ_FLAGS_REQ_ACK   BIT(0)      /* Set the ACK bit in the header */
+#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
 
 /* SDMA request flag bits */
 #define SDMA_REQ_FOR_THREAD 1
@@ -943,8 +945,13 @@
 		tx->busycount = 0;
 		INIT_LIST_HEAD(&tx->list);
 
+		/*
+		 * For the last packet set the ACK request
+		 * and disable header suppression.
+		 */
 		if (req->seqnum == req->info.npkts - 1)
-			tx->flags |= TXREQ_FLAGS_REQ_LAST_PKT;
+			tx->flags |= (TXREQ_FLAGS_REQ_ACK |
+				      TXREQ_FLAGS_REQ_DISABLE_SH);
 
 		/*
 		 * Calculate the payload size - this is min of the fragment
@@ -963,11 +970,22 @@
 			}
 
 			datalen = compute_data_length(req, tx);
+
+			/*
+			 * Disable header suppression for the payload <= 8DWS.
+			 * If there is an uncorrectable error in the receive
+			 * data FIFO when the received payload size is less than
+			 * or equal to 8DWS then the RxDmaDataFifoRdUncErr is
+			 * not reported.There is set RHF.EccErr if the header
+			 * is not suppressed.
+			 */
 			if (!datalen) {
 				SDMA_DBG(req,
 					 "Request has data but pkt len is 0");
 				ret = -EFAULT;
 				goto free_tx;
+			} else if (datalen <= 32) {
+				tx->flags |= TXREQ_FLAGS_REQ_DISABLE_SH;
 			}
 		}
 
@@ -990,6 +1008,10 @@
 						LRH2PBC(lrhlen);
 					tx->hdr.pbc[0] = cpu_to_le16(pbclen);
 				}
+				ret = check_header_template(req, &tx->hdr,
+							    lrhlen, datalen);
+				if (ret)
+					goto free_tx;
 				ret = sdma_txinit_ahg(&tx->txreq,
 						      SDMA_TXREQ_F_AHG_COPY,
 						      sizeof(tx->hdr) + datalen,
@@ -1351,7 +1373,7 @@
 				req->seqnum));
 
 	/* Set ACK request on last packet */
-	if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
+	if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
 		hdr->bth[2] |= cpu_to_be32(1UL << 31);
 
 	/* Set the new offset */
@@ -1384,8 +1406,8 @@
 		/* Set KDETH.TID based on value for this TID */
 		KDETH_SET(hdr->kdeth.ver_tid_offset, TID,
 			  EXP_TID_GET(tidval, IDX));
-		/* Clear KDETH.SH only on the last packet */
-		if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
+		/* Clear KDETH.SH when DISABLE_SH flag is set */
+		if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH))
 			KDETH_SET(hdr->kdeth.ver_tid_offset, SH, 0);
 		/*
 		 * Set the KDETH.OFFSET and KDETH.OM based on size of
@@ -1429,7 +1451,7 @@
 	/* BTH.PSN and BTH.A */
 	val32 = (be32_to_cpu(hdr->bth[2]) + req->seqnum) &
 		(HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff);
-	if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
+	if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
 		val32 |= 1UL << 31;
 	AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16));
 	AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff));
@@ -1468,19 +1490,23 @@
 		AHG_HEADER_SET(req->ahg, diff, 7, 0, 16,
 			       ((!!(req->omfactor - KDETH_OM_SMALL)) << 15 |
 				((req->tidoffset / req->omfactor) & 0x7fff)));
-		/* KDETH.TIDCtrl, KDETH.TID */
+		/* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */
 		val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) |
-					(EXP_TID_GET(tidval, IDX) & 0x3ff));
-		/* Clear KDETH.SH on last packet */
-		if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) {
-			val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset,
-						     INTR) <<
-					   AHG_KDETH_INTR_SHIFT);
-			val &= cpu_to_le16(~(1U << 13));
-			AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
+				   (EXP_TID_GET(tidval, IDX) & 0x3ff));
+
+		if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) {
+			val |= cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset,
+						      INTR) <<
+					    AHG_KDETH_INTR_SHIFT));
 		} else {
-			AHG_HEADER_SET(req->ahg, diff, 7, 16, 12, val);
+			val |= KDETH_GET(hdr->kdeth.ver_tid_offset, SH) ?
+			       cpu_to_le16(0x1 << AHG_KDETH_SH_SHIFT) :
+			       cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset,
+						      INTR) <<
+					     AHG_KDETH_INTR_SHIFT));
 		}
+
+		AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
 	}
 
 	trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 4b7a16c..95ed4d6 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -297,22 +297,6 @@
 }
 
 /*
- * Translate ib_wr_opcode into ib_wc_opcode.
- */
-const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
-	[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
-	[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
-	[IB_WR_SEND] = IB_WC_SEND,
-	[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
-	[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
-	[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
-	[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
-	[IB_WR_SEND_WITH_INV] = IB_WC_SEND,
-	[IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
-	[IB_WR_REG_MR] = IB_WC_REG_MR
-};
-
-/*
  * Length of header by opcode, 0 --> not supported
  */
 const u8 hdr_len_by_opcode[256] = {
@@ -694,6 +678,7 @@
 		qp = iowait_to_qp(wait);
 		priv = qp->priv;
 		list_del_init(&priv->s_iowait.list);
+		priv->s_iowait.lock = NULL;
 		/* refcount held until actual wake up */
 		if (!list_empty(list))
 			mod_timer(&dev->mem_timer, jiffies + 1);
@@ -769,6 +754,7 @@
 				mod_timer(&dev->mem_timer, jiffies + 1);
 			qp->s_flags |= RVT_S_WAIT_KMEM;
 			list_add_tail(&priv->s_iowait.list, &dev->memwait);
+			priv->s_iowait.lock = &dev->iowait_lock;
 			trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
 			rvt_get_qp(qp);
 		}
@@ -788,10 +774,10 @@
  */
 static noinline int build_verbs_ulp_payload(
 	struct sdma_engine *sde,
-	struct rvt_sge_state *ss,
 	u32 length,
 	struct verbs_txreq *tx)
 {
+	struct rvt_sge_state *ss = tx->ss;
 	struct rvt_sge *sg_list = ss->sg_list;
 	struct rvt_sge sge = ss->sge;
 	u8 num_sge = ss->num_sge;
@@ -835,7 +821,6 @@
 /* New API */
 static int build_verbs_tx_desc(
 	struct sdma_engine *sde,
-	struct rvt_sge_state *ss,
 	u32 length,
 	struct verbs_txreq *tx,
 	struct hfi1_ahg_info *ahg_info,
@@ -879,9 +864,9 @@
 			goto bail_txadd;
 	}
 
-	/* add the ulp payload - if any.  ss can be NULL for acks */
-	if (ss)
-		ret = build_verbs_ulp_payload(sde, ss, length, tx);
+	/* add the ulp payload - if any. tx->ss can be NULL for acks */
+	if (tx->ss)
+		ret = build_verbs_ulp_payload(sde, length, tx);
 bail_txadd:
 	return ret;
 }
@@ -892,8 +877,7 @@
 	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_ahg_info *ahg_info = priv->s_ahg;
 	u32 hdrwords = qp->s_hdrwords;
-	struct rvt_sge_state *ss = qp->s_cur_sge;
-	u32 len = qp->s_cur_size;
+	u32 len = ps->s_txreq->s_cur_size;
 	u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */
 	struct hfi1_ibdev *dev = ps->dev;
 	struct hfi1_pportdata *ppd = ps->ppd;
@@ -918,7 +902,7 @@
 					 plen);
 		}
 		tx->wqe = qp->s_wqe;
-		ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahg_info, pbc);
+		ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc);
 		if (unlikely(ret))
 			goto bail_build;
 	}
@@ -980,6 +964,7 @@
 			qp->s_flags |= flag;
 			was_empty = list_empty(&sc->piowait);
 			list_add_tail(&priv->s_iowait.list, &sc->piowait);
+			priv->s_iowait.lock = &dev->iowait_lock;
 			trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
 			rvt_get_qp(qp);
 			/* counting: only call wantpiobuf_intr if first user */
@@ -1008,8 +993,8 @@
 {
 	struct hfi1_qp_priv *priv = qp->priv;
 	u32 hdrwords = qp->s_hdrwords;
-	struct rvt_sge_state *ss = qp->s_cur_sge;
-	u32 len = qp->s_cur_size;
+	struct rvt_sge_state *ss = ps->s_txreq->ss;
+	u32 len = ps->s_txreq->s_cur_size;
 	u32 dwords = (len + 3) >> 2;
 	u32 plen = hdrwords + dwords + 2; /* includes pbc */
 	struct hfi1_pportdata *ppd = ps->ppd;
@@ -1237,7 +1222,7 @@
 		u8 op = get_opcode(h);
 
 		if (piothreshold &&
-		    qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
+		    tx->s_cur_size <= min(piothreshold, qp->pmtu) &&
 		    (BIT(op & OPMASK) & pio_opmask[op >> 5]) &&
 		    iowait_sdma_pending(&priv->s_iowait) == 0 &&
 		    !sdma_txreq_built(&tx->txreq))
@@ -1483,15 +1468,11 @@
 			    int guid_index, __be64 *guid)
 {
 	struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp);
-	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 
-	if (guid_index == 0)
-		*guid = cpu_to_be64(ppd->guid);
-	else if (guid_index < HFI1_GUIDS_PER_PORT)
-		*guid = ibp->guids[guid_index - 1];
-	else
+	if (guid_index >= HFI1_GUIDS_PER_PORT)
 		return -EINVAL;
 
+	*guid = get_sguid(ibp, guid_index);
 	return 0;
 }
 
@@ -1610,6 +1591,154 @@
 		 dc8051_ver_min(ver));
 }
 
+static const char * const driver_cntr_names[] = {
+	/* must be element 0*/
+	"DRIVER_KernIntr",
+	"DRIVER_ErrorIntr",
+	"DRIVER_Tx_Errs",
+	"DRIVER_Rcv_Errs",
+	"DRIVER_HW_Errs",
+	"DRIVER_NoPIOBufs",
+	"DRIVER_CtxtsOpen",
+	"DRIVER_RcvLen_Errs",
+	"DRIVER_EgrBufFull",
+	"DRIVER_EgrHdrFull"
+};
+
+static const char **dev_cntr_names;
+static const char **port_cntr_names;
+static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
+static int num_dev_cntrs;
+static int num_port_cntrs;
+static int cntr_names_initialized;
+
+/*
+ * Convert a list of names separated by '\n' into an array of NULL terminated
+ * strings. Optionally some entries can be reserved in the array to hold extra
+ * external strings.
+ */
+static int init_cntr_names(const char *names_in,
+			   const int names_len,
+			   int num_extra_names,
+			   int *num_cntrs,
+			   const char ***cntr_names)
+{
+	char *names_out, *p, **q;
+	int i, n;
+
+	n = 0;
+	for (i = 0; i < names_len; i++)
+		if (names_in[i] == '\n')
+			n++;
+
+	names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len,
+			    GFP_KERNEL);
+	if (!names_out) {
+		*num_cntrs = 0;
+		*cntr_names = NULL;
+		return -ENOMEM;
+	}
+
+	p = names_out + (n + num_extra_names) * sizeof(char *);
+	memcpy(p, names_in, names_len);
+
+	q = (char **)names_out;
+	for (i = 0; i < n; i++) {
+		q[i] = p;
+		p = strchr(p, '\n');
+		*p++ = '\0';
+	}
+
+	*num_cntrs = n;
+	*cntr_names = (const char **)names_out;
+	return 0;
+}
+
+static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
+					    u8 port_num)
+{
+	int i, err;
+
+	if (!cntr_names_initialized) {
+		struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+
+		err = init_cntr_names(dd->cntrnames,
+				      dd->cntrnameslen,
+				      num_driver_cntrs,
+				      &num_dev_cntrs,
+				      &dev_cntr_names);
+		if (err)
+			return NULL;
+
+		for (i = 0; i < num_driver_cntrs; i++)
+			dev_cntr_names[num_dev_cntrs + i] =
+				driver_cntr_names[i];
+
+		err = init_cntr_names(dd->portcntrnames,
+				      dd->portcntrnameslen,
+				      0,
+				      &num_port_cntrs,
+				      &port_cntr_names);
+		if (err) {
+			kfree(dev_cntr_names);
+			dev_cntr_names = NULL;
+			return NULL;
+		}
+		cntr_names_initialized = 1;
+	}
+
+	if (!port_num)
+		return rdma_alloc_hw_stats_struct(
+				dev_cntr_names,
+				num_dev_cntrs + num_driver_cntrs,
+				RDMA_HW_STATS_DEFAULT_LIFESPAN);
+	else
+		return rdma_alloc_hw_stats_struct(
+				port_cntr_names,
+				num_port_cntrs,
+				RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static u64 hfi1_sps_ints(void)
+{
+	unsigned long flags;
+	struct hfi1_devdata *dd;
+	u64 sps_ints = 0;
+
+	spin_lock_irqsave(&hfi1_devs_lock, flags);
+	list_for_each_entry(dd, &hfi1_dev_list, list) {
+		sps_ints += get_all_cpu_total(dd->int_counter);
+	}
+	spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+	return sps_ints;
+}
+
+static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+			u8 port, int index)
+{
+	u64 *values;
+	int count;
+
+	if (!port) {
+		u64 *stats = (u64 *)&hfi1_stats;
+		int i;
+
+		hfi1_read_cntrs(dd_from_ibdev(ibdev), NULL, &values);
+		values[num_dev_cntrs] = hfi1_sps_ints();
+		for (i = 1; i < num_driver_cntrs; i++)
+			values[num_dev_cntrs + i] = stats[i];
+		count = num_dev_cntrs + num_driver_cntrs;
+	} else {
+		struct hfi1_ibport *ibp = to_iport(ibdev, port);
+
+		hfi1_read_portcntrs(ppd_from_ibp(ibp), NULL, &values);
+		count = num_port_cntrs;
+	}
+
+	memcpy(stats->value, values, count * sizeof(u64));
+	return count;
+}
+
 /**
  * hfi1_register_ib_device - register our device with the infiniband core
  * @dd: the device data structure
@@ -1620,6 +1749,7 @@
 	struct hfi1_ibdev *dev = &dd->verbs_dev;
 	struct ib_device *ibdev = &dev->rdi.ibdev;
 	struct hfi1_pportdata *ppd = dd->pport;
+	struct hfi1_ibport *ibp = &ppd->ibport_data;
 	unsigned i;
 	int ret;
 	size_t lcpysz = IB_DEVICE_NAME_MAX;
@@ -1632,6 +1762,7 @@
 	setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
 
 	seqlock_init(&dev->iowait_lock);
+	seqlock_init(&dev->txwait_lock);
 	INIT_LIST_HEAD(&dev->txwait);
 	INIT_LIST_HEAD(&dev->memwait);
 
@@ -1639,20 +1770,24 @@
 	if (ret)
 		goto err_verbs_txreq;
 
+	/* Use first-port GUID as node guid */
+	ibdev->node_guid = get_sguid(ibp, HFI1_PORT_GUID_INDEX);
+
 	/*
 	 * The system image GUID is supposed to be the same for all
 	 * HFIs in a single system but since there can be other
 	 * device types in the system, we can't be sure this is unique.
 	 */
 	if (!ib_hfi1_sys_image_guid)
-		ib_hfi1_sys_image_guid = cpu_to_be64(ppd->guid);
+		ib_hfi1_sys_image_guid = ibdev->node_guid;
 	lcpysz = strlcpy(ibdev->name, class_name(), lcpysz);
 	strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
 	ibdev->owner = THIS_MODULE;
-	ibdev->node_guid = cpu_to_be64(ppd->guid);
 	ibdev->phys_port_cnt = dd->num_pports;
 	ibdev->dma_device = &dd->pcidev->dev;
 	ibdev->modify_device = modify_device;
+	ibdev->alloc_hw_stats = alloc_hw_stats;
+	ibdev->get_hw_stats = get_hw_stats;
 
 	/* keep process mad in the driver */
 	ibdev->process_mad = hfi1_process_mad;
@@ -1767,6 +1902,10 @@
 
 	del_timer_sync(&dev->mem_timer);
 	verbs_txreq_exit(dev);
+
+	kfree(dev_cntr_names);
+	kfree(port_cntr_names);
+	cntr_names_initialized = 0;
 }
 
 void hfi1_cnp_rcv(struct hfi1_packet *packet)
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 1c3815d..e6b8930 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -73,7 +73,6 @@
 #include "iowait.h"
 
 #define HFI1_MAX_RDMA_ATOMIC     16
-#define HFI1_GUIDS_PER_PORT	5
 
 /*
  * Increment this value if any changes that break userspace ABI
@@ -169,8 +168,6 @@
 	struct rvt_qp __rcu *qp[2];
 	struct rvt_ibport rvp;
 
-	__be64 guids[HFI1_GUIDS_PER_PORT	- 1];	/* writable GUIDs */
-
 	/* the first 16 entries are sl_to_vl for !OPA */
 	u8 sl_to_sc[32];
 	u8 sc_to_sl[32];
@@ -180,19 +177,20 @@
 	struct rvt_dev_info rdi; /* Must be first */
 
 	/* QP numbers are shared by all IB ports */
-	/* protect wait lists */
-	seqlock_t iowait_lock;
+	/* protect txwait list */
+	seqlock_t txwait_lock ____cacheline_aligned_in_smp;
 	struct list_head txwait;        /* list for wait verbs_txreq */
 	struct list_head memwait;       /* list for wait kernel memory */
-	struct list_head txreq_free;
 	struct kmem_cache *verbs_txreq_cache;
-	struct timer_list mem_timer;
-
-	u64 n_piowait;
-	u64 n_piodrain;
 	u64 n_txwait;
 	u64 n_kmem_wait;
 
+	/* protect iowait lists */
+	seqlock_t iowait_lock ____cacheline_aligned_in_smp;
+	u64 n_piowait;
+	u64 n_piodrain;
+	struct timer_list mem_timer;
+
 #ifdef CONFIG_DEBUG_FS
 	/* per HFI debugfs */
 	struct dentry *hfi1_ibdev_dbg;
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index 094ab82..5d23172 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -72,22 +72,22 @@
 	kmem_cache_free(dev->verbs_txreq_cache, tx);
 
 	do {
-		seq = read_seqbegin(&dev->iowait_lock);
+		seq = read_seqbegin(&dev->txwait_lock);
 		if (!list_empty(&dev->txwait)) {
 			struct iowait *wait;
 
-			write_seqlock_irqsave(&dev->iowait_lock, flags);
+			write_seqlock_irqsave(&dev->txwait_lock, flags);
 			wait = list_first_entry(&dev->txwait, struct iowait,
 						list);
 			qp = iowait_to_qp(wait);
 			priv = qp->priv;
 			list_del_init(&priv->s_iowait.list);
 			/* refcount held until actual wake up */
-			write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+			write_sequnlock_irqrestore(&dev->txwait_lock, flags);
 			hfi1_qp_wakeup(qp, RVT_S_WAIT_TX);
 			break;
 		}
-	} while (read_seqretry(&dev->iowait_lock, seq));
+	} while (read_seqretry(&dev->txwait_lock, seq));
 }
 
 struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
@@ -96,7 +96,7 @@
 {
 	struct verbs_txreq *tx = ERR_PTR(-EBUSY);
 
-	write_seqlock(&dev->iowait_lock);
+	write_seqlock(&dev->txwait_lock);
 	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		struct hfi1_qp_priv *priv;
 
@@ -108,13 +108,14 @@
 			dev->n_txwait++;
 			qp->s_flags |= RVT_S_WAIT_TX;
 			list_add_tail(&priv->s_iowait.list, &dev->txwait);
+			priv->s_iowait.lock = &dev->txwait_lock;
 			trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX);
 			rvt_get_qp(qp);
 		}
 		qp->s_flags &= ~RVT_S_BUSY;
 	}
 out:
-	write_sequnlock(&dev->iowait_lock);
+	write_sequnlock(&dev->txwait_lock);
 	return tx;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 5660897..76216f2 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -65,6 +65,7 @@
 	struct sdma_engine     *sde;
 	struct send_context     *psc;
 	u16                     hdr_dwords;
+	u16			s_cur_size;
 };
 
 struct hfi1_ibdev;
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 24f79ee..0ac294d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -39,7 +39,8 @@
 #define HNS_ROCE_VLAN_SL_BIT_MASK	7
 #define HNS_ROCE_VLAN_SL_SHIFT		13
 
-struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr)
+struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr,
+				 struct ib_udata *udata)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device);
 	struct device *dev = &hr_dev->pdev->dev;
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 863a17a..605962f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -61,9 +61,10 @@
 	return ret;
 }
 
-void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj)
+void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj,
+			  int rr)
 {
-	hns_roce_bitmap_free_range(bitmap, obj, 1);
+	hns_roce_bitmap_free_range(bitmap, obj, 1, rr);
 }
 
 int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt,
@@ -106,7 +107,8 @@
 }
 
 void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap,
-				unsigned long obj, int cnt)
+				unsigned long obj, int cnt,
+				int rr)
 {
 	int i;
 
@@ -116,7 +118,8 @@
 	for (i = 0; i < cnt; i++)
 		clear_bit(obj + i, bitmap->table);
 
-	bitmap->last = min(bitmap->last, obj);
+	if (!rr)
+		bitmap->last = min(bitmap->last, obj);
 	bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
 		       & bitmap->mask;
 	spin_unlock(&bitmap->lock);
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 2a0b6c0..8c1f7a6f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -216,10 +216,10 @@
 		goto out;
 
 	/*
-	* It is timeout when wait_for_completion_timeout return 0
-	* The return value is the time limit set in advance
-	* how many seconds showing
-	*/
+	 * It is timeout when wait_for_completion_timeout return 0
+	 * The return value is the time limit set in advance
+	 * how many seconds showing
+	 */
 	if (!wait_for_completion_timeout(&context->done,
 					 msecs_to_jiffies(timeout))) {
 		dev_err(dev, "[cmd]wait_for_completion_timeout timeout\n");
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index e3997d3..f5a9ee2 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -34,6 +34,7 @@
 #define _HNS_ROCE_CMD_H
 
 #define HNS_ROCE_MAILBOX_SIZE		4096
+#define HNS_ROCE_CMD_TIMEOUT_MSECS	10000
 
 enum {
 	/* TPT commands */
@@ -57,17 +58,6 @@
 	HNS_ROCE_CMD_QUERY_QP		= 0x22,
 };
 
-enum {
-	HNS_ROCE_CMD_TIME_CLASS_A	= 10000,
-	HNS_ROCE_CMD_TIME_CLASS_B	= 10000,
-	HNS_ROCE_CMD_TIME_CLASS_C	= 10000,
-};
-
-struct hns_roce_cmd_mailbox {
-	void		       *buf;
-	dma_addr_t		dma;
-};
-
 int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
 		      unsigned long in_modifier, u8 op_modifier, u16 op,
 		      unsigned long timeout);
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index 2970161..4af403e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -57,6 +57,32 @@
 #define roce_set_bit(origin, shift, val) \
 	roce_set_field((origin), (1ul << (shift)), (shift), (val))
 
+/*
+ * roce_hw_index_cmp_lt - Compare two hardware index values in hisilicon
+ *                        SOC, check if a is less than b.
+ * @a: hardware index value
+ * @b: hardware index value
+ * @bits: the number of bits of a and b, range: 0~31.
+ *
+ * Hardware index increases continuously till max value, and then restart
+ * from zero, again and again. Because the bits of reg field is often
+ * limited, the reg field can only hold the low bits of the hardware index
+ * in hisilicon SOC.
+ * In some scenes we need to compare two values(a,b) getted from two reg
+ * fields in this driver, for example:
+ * If a equals 0xfffe, b equals 0x1 and bits equals 16, we think b has
+ * incresed from 0xffff to 0x1 and a is less than b.
+ * If a equals 0xfffe, b equals 0x0xf001 and bits equals 16, we think a
+ * is bigger than b.
+ *
+ * Return true on a less than b, otherwise false.
+ */
+#define roce_hw_index_mask(bits)	((1ul << (bits)) - 1)
+#define roce_hw_index_shift(bits)	(32 - (bits))
+#define roce_hw_index_cmp_lt(a, b, bits) \
+	((int)((((a) - (b)) & roce_hw_index_mask(bits)) << \
+		roce_hw_index_shift(bits)) < 0)
+
 #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3
 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4
 
@@ -245,16 +271,26 @@
 #define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M   \
 	(((1UL << 28) - 1) << ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)
 
+#define ROCEE_SDB_PTR_CMP_BITS 28
+
 #define ROCEE_SDB_INV_CNT_SDB_INV_CNT_S 0
 #define ROCEE_SDB_INV_CNT_SDB_INV_CNT_M   \
 	(((1UL << 16) - 1) << ROCEE_SDB_INV_CNT_SDB_INV_CNT_S)
 
+#define ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S	0
+#define ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M	\
+	(((1UL << 16) - 1) << ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S)
+
+#define ROCEE_SDB_CNT_CMP_BITS 16
+
+#define ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S	20
+
+#define ROCEE_CNT_CLR_CE_CNT_CLR_CE_S 0
+
 /*************ROCEE_REG DEFINITION****************/
 #define ROCEE_VENDOR_ID_REG			0x0
 #define ROCEE_VENDOR_PART_ID_REG		0x4
 
-#define ROCEE_HW_VERSION_REG			0x8
-
 #define ROCEE_SYS_IMAGE_GUID_L_REG		0xC
 #define ROCEE_SYS_IMAGE_GUID_H_REG		0x10
 
@@ -318,7 +354,11 @@
 
 #define ROCEE_SDB_ISSUE_PTR_REG			0x758
 #define ROCEE_SDB_SEND_PTR_REG			0x75C
+#define ROCEE_CAEP_CQE_WCMD_EMPTY		0x850
+#define ROCEE_SCAEP_WR_CQE_CNT			0x8D0
 #define ROCEE_SDB_INV_CNT_REG			0x9A4
+#define ROCEE_SDB_RETRY_CNT_REG			0x9AC
+#define ROCEE_TSP_BP_ST_REG			0x9EC
 #define ROCEE_ECC_UCERR_ALM0_REG		0xB34
 #define ROCEE_ECC_CERR_ALM0_REG			0xB40
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 0973659..589496c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -35,7 +35,7 @@
 #include "hns_roce_device.h"
 #include "hns_roce_cmd.h"
 #include "hns_roce_hem.h"
-#include "hns_roce_user.h"
+#include <rdma/hns-abi.h>
 #include "hns_roce_common.h"
 
 static void hns_roce_ib_cq_comp(struct hns_roce_cq *hr_cq)
@@ -77,7 +77,7 @@
 			     unsigned long cq_num)
 {
 	return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0,
-			    HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIME_CLASS_A);
+			    HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIMEOUT_MSECS);
 }
 
 static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent,
@@ -166,7 +166,7 @@
 	hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
 
 err_out:
-	hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn);
+	hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
 	return ret;
 }
 
@@ -176,11 +176,10 @@
 {
 	return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num,
 				 mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_CQ,
-				 HNS_ROCE_CMD_TIME_CLASS_A);
+				 HNS_ROCE_CMD_TIMEOUT_MSECS);
 }
 
-static void hns_roce_free_cq(struct hns_roce_dev *hr_dev,
-			     struct hns_roce_cq *hr_cq)
+void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
 {
 	struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
 	struct device *dev = &hr_dev->pdev->dev;
@@ -204,7 +203,7 @@
 	spin_unlock_irq(&cq_table->lock);
 
 	hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
-	hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn);
+	hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
 }
 
 static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
@@ -349,6 +348,15 @@
 		goto err_mtt;
 	}
 
+	/*
+	 * For the QP created by kernel space, tptr value should be initialized
+	 * to zero; For the QP created by user space, it will cause synchronous
+	 * problems if tptr is set to zero here, so we initialze it in user
+	 * space.
+	 */
+	if (!context)
+		*hr_cq->tptr_addr = 0;
+
 	/* Get created cq handler and carry out event */
 	hr_cq->comp = hns_roce_ib_cq_comp;
 	hr_cq->event = hns_roce_ib_cq_event;
@@ -383,19 +391,25 @@
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
 	struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+	int ret = 0;
 
-	hns_roce_free_cq(hr_dev, hr_cq);
-	hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+	if (hr_dev->hw->destroy_cq) {
+		ret = hr_dev->hw->destroy_cq(ib_cq);
+	} else {
+		hns_roce_free_cq(hr_dev, hr_cq);
+		hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
 
-	if (ib_cq->uobject)
-		ib_umem_release(hr_cq->umem);
-	else
-		/* Free the buff of stored cq */
-		hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, ib_cq->cqe);
+		if (ib_cq->uobject)
+			ib_umem_release(hr_cq->umem);
+		else
+			/* Free the buff of stored cq */
+			hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf,
+						ib_cq->cqe);
 
-	kfree(hr_cq);
+		kfree(hr_cq);
+	}
 
-	return 0;
+	return ret;
 }
 
 void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 3417315..1a6cb5d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -37,6 +37,8 @@
 
 #define DRV_NAME "hns_roce"
 
+#define HNS_ROCE_HW_VER1	('h' << 24 | 'i' << 16 | '0' << 8 | '6')
+
 #define MAC_ADDR_OCTET_NUM			6
 #define HNS_ROCE_MAX_MSG_LEN			0x80000000
 
@@ -54,6 +56,12 @@
 #define HNS_ROCE_MAX_INNER_MTPT_NUM		0x7
 #define HNS_ROCE_MAX_MTPT_PBL_NUM		0x100000
 
+#define HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS	20
+#define HNS_ROCE_MAX_FREE_CQ_WAIT_CNT	\
+	(5000 / HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS)
+#define HNS_ROCE_CQE_WCMD_EMPTY_BIT		0x2
+#define HNS_ROCE_MIN_CQE_CNT			16
+
 #define HNS_ROCE_MAX_IRQ_NUM			34
 
 #define HNS_ROCE_COMP_VEC_NUM			32
@@ -70,6 +78,9 @@
 #define HNS_ROCE_MAX_GID_NUM			16
 #define HNS_ROCE_GID_SIZE			16
 
+#define BITMAP_NO_RR				0
+#define BITMAP_RR				1
+
 #define MR_TYPE_MR				0x00
 #define MR_TYPE_DMA				0x03
 
@@ -196,9 +207,9 @@
 /* Order = 0: bitmap is biggest, order = max bitmap is least (only a bit) */
 /* Every bit repesent to a partner free/used status in bitmap */
 /*
-* Initial, bits of other bitmap are all 0 except that a bit of max_order is 1
-* Bit = 1 represent to idle and available; bit = 0: not available
-*/
+ * Initial, bits of other bitmap are all 0 except that a bit of max_order is 1
+ * Bit = 1 represent to idle and available; bit = 0: not available
+ */
 struct hns_roce_buddy {
 	/* Members point to every order level bitmap */
 	unsigned long **bits;
@@ -296,7 +307,7 @@
 	u32				cq_depth;
 	u32				cons_index;
 	void __iomem			*cq_db_l;
-	void __iomem			*tptr_addr;
+	u16				*tptr_addr;
 	unsigned long			cqn;
 	u32				vector;
 	atomic_t			refcount;
@@ -360,29 +371,34 @@
 	struct mutex		hcr_mutex;
 	struct semaphore	poll_sem;
 	/*
-	* Event mode: cmd register mutex protection,
-	* ensure to not exceed max_cmds and user use limit region
-	*/
+	 * Event mode: cmd register mutex protection,
+	 * ensure to not exceed max_cmds and user use limit region
+	 */
 	struct semaphore	event_sem;
 	int			max_cmds;
 	spinlock_t		context_lock;
 	int			free_head;
 	struct hns_roce_cmd_context *context;
 	/*
-	* Result of get integer part
-	* which max_comds compute according a power of 2
-	*/
+	 * Result of get integer part
+	 * which max_comds compute according a power of 2
+	 */
 	u16			token_mask;
 	/*
-	* Process whether use event mode, init default non-zero
-	* After the event queue of cmd event ready,
-	* can switch into event mode
-	* close device, switch into poll mode(non event mode)
-	*/
+	 * Process whether use event mode, init default non-zero
+	 * After the event queue of cmd event ready,
+	 * can switch into event mode
+	 * close device, switch into poll mode(non event mode)
+	 */
 	u8			use_events;
 	u8			toggle;
 };
 
+struct hns_roce_cmd_mailbox {
+	void		       *buf;
+	dma_addr_t		dma;
+};
+
 struct hns_roce_dev;
 
 struct hns_roce_qp {
@@ -424,8 +440,6 @@
 	struct net_device      *netdevs[HNS_ROCE_MAX_PORTS];
 	struct notifier_block	nb;
 	struct notifier_block	nb_inet;
-	/* 16 GID is shared by 6 port in v1 engine. */
-	union ib_gid		gid_table[HNS_ROCE_MAX_GID_NUM];
 	u8			phy_port[HNS_ROCE_MAX_PORTS];
 };
 
@@ -519,6 +533,8 @@
 			 struct ib_recv_wr **bad_recv_wr);
 	int (*req_notify_cq)(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
 	int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+	int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr);
+	int (*destroy_cq)(struct ib_cq *ibcq);
 	void	*priv;
 };
 
@@ -553,6 +569,8 @@
 
 	int			cmd_mod;
 	int			loop_idc;
+	dma_addr_t		tptr_dma_addr; /*only for hw v1*/
+	u32			tptr_size; /*only for hw v1*/
 	struct hns_roce_hw	*hw;
 };
 
@@ -657,7 +675,8 @@
 void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev);
 
 int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj);
-void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj);
+void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj,
+			 int rr);
 int hns_roce_bitmap_init(struct hns_roce_bitmap *bitmap, u32 num, u32 mask,
 			 u32 reserved_bot, u32 resetrved_top);
 void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap);
@@ -665,9 +684,11 @@
 int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt,
 				int align, unsigned long *obj);
 void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap,
-				unsigned long obj, int cnt);
+				unsigned long obj, int cnt,
+				int rr);
 
-struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+				 struct ib_udata *udata);
 int hns_roce_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
 int hns_roce_destroy_ah(struct ib_ah *ah);
 
@@ -681,6 +702,10 @@
 				   u64 virt_addr, int access_flags,
 				   struct ib_udata *udata);
 int hns_roce_dereg_mr(struct ib_mr *ibmr);
+int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
+		       struct hns_roce_cmd_mailbox *mailbox,
+		       unsigned long mpt_index);
+unsigned long key_to_hw_index(u32 key);
 
 void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
 		       struct hns_roce_buf *buf);
@@ -717,6 +742,7 @@
 				    struct ib_udata *udata);
 
 int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq);
+void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq);
 
 void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
 void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.c b/drivers/infiniband/hw/hns/hns_roce_eq.c
index 21e21b0..50f8649 100644
--- a/drivers/infiniband/hw/hns/hns_roce_eq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_eq.c
@@ -371,9 +371,9 @@
 	int i = 0;
 
 	/**
-	* AEQ overflow ECC mult bit err CEQ overflow alarm
-	* must clear interrupt, mask irq, clear irq, cancel mask operation
-	*/
+	 * AEQ overflow ECC mult bit err CEQ overflow alarm
+	 * must clear interrupt, mask irq, clear irq, cancel mask operation
+	 */
 	aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG);
 
 	if (roce_get_bit(aeshift_val,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index 250d8f2..c5104e0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -80,9 +80,9 @@
 			--order;
 
 		/*
-		* Alloc memory one time. If failed, don't alloc small block
-		* memory, directly return fail.
-		*/
+		 * Alloc memory one time. If failed, don't alloc small block
+		 * memory, directly return fail.
+		 */
 		mem = &chunk->mem[chunk->npages];
 		buf = dma_alloc_coherent(&hr_dev->pdev->dev, PAGE_SIZE << order,
 				&sg_dma_address(mem), gfp_mask);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 71232e5..b8111b0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -32,6 +32,7 @@
 
 #include <linux/platform_device.h>
 #include <linux/acpi.h>
+#include <linux/etherdevice.h>
 #include <rdma/ib_umem.h>
 #include "hns_roce_common.h"
 #include "hns_roce_device.h"
@@ -72,6 +73,8 @@
 	int nreq = 0;
 	u32 ind = 0;
 	int ret = 0;
+	u8 *smac;
+	int loopback;
 
 	if (unlikely(ibqp->qp_type != IB_QPT_GSI &&
 		ibqp->qp_type != IB_QPT_RC)) {
@@ -129,6 +132,14 @@
 				       UD_SEND_WQE_U32_8_DMAC_5_M,
 				       UD_SEND_WQE_U32_8_DMAC_5_S,
 				       ah->av.mac[5]);
+
+			smac = (u8 *)hr_dev->dev_addr[qp->port];
+			loopback = ether_addr_equal_unaligned(ah->av.mac,
+							      smac) ? 1 : 0;
+			roce_set_bit(ud_sq_wqe->u32_8,
+				     UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S,
+				     loopback);
+
 			roce_set_field(ud_sq_wqe->u32_8,
 				       UD_SEND_WQE_U32_8_OPERATION_TYPE_M,
 				       UD_SEND_WQE_U32_8_OPERATION_TYPE_S,
@@ -284,6 +295,8 @@
 		roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M,
 			       SQ_DOORBELL_U32_4_SQ_HEAD_S,
 			      (qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1)));
+		roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SL_M,
+			       SQ_DOORBELL_U32_4_SL_S, qp->sl);
 		roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_PORT_M,
 			       SQ_DOORBELL_U32_4_PORT_S, qp->phy_port);
 		roce_set_field(sq_db.u32_8, SQ_DOORBELL_U32_8_QPN_M,
@@ -611,6 +624,213 @@
 	return ret;
 }
 
+static struct hns_roce_qp *hns_roce_v1_create_lp_qp(struct hns_roce_dev *hr_dev,
+						    struct ib_pd *pd)
+{
+	struct device *dev = &hr_dev->pdev->dev;
+	struct ib_qp_init_attr init_attr;
+	struct ib_qp *qp;
+
+	memset(&init_attr, 0, sizeof(struct ib_qp_init_attr));
+	init_attr.qp_type		= IB_QPT_RC;
+	init_attr.sq_sig_type		= IB_SIGNAL_ALL_WR;
+	init_attr.cap.max_recv_wr	= HNS_ROCE_MIN_WQE_NUM;
+	init_attr.cap.max_send_wr	= HNS_ROCE_MIN_WQE_NUM;
+
+	qp = hns_roce_create_qp(pd, &init_attr, NULL);
+	if (IS_ERR(qp)) {
+		dev_err(dev, "Create loop qp for mr free failed!");
+		return NULL;
+	}
+
+	return to_hr_qp(qp);
+}
+
+static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
+{
+	struct hns_roce_caps *caps = &hr_dev->caps;
+	struct device *dev = &hr_dev->pdev->dev;
+	struct ib_cq_init_attr cq_init_attr;
+	struct hns_roce_free_mr *free_mr;
+	struct ib_qp_attr attr = { 0 };
+	struct hns_roce_v1_priv *priv;
+	struct hns_roce_qp *hr_qp;
+	struct ib_cq *cq;
+	struct ib_pd *pd;
+	u64 subnet_prefix;
+	int attr_mask = 0;
+	int i;
+	int ret;
+	u8 phy_port;
+	u8 sl;
+
+	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+	free_mr = &priv->free_mr;
+
+	/* Reserved cq for loop qp */
+	cq_init_attr.cqe		= HNS_ROCE_MIN_WQE_NUM * 2;
+	cq_init_attr.comp_vector	= 0;
+	cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL, NULL);
+	if (IS_ERR(cq)) {
+		dev_err(dev, "Create cq for reseved loop qp failed!");
+		return -ENOMEM;
+	}
+	free_mr->mr_free_cq = to_hr_cq(cq);
+	free_mr->mr_free_cq->ib_cq.device		= &hr_dev->ib_dev;
+	free_mr->mr_free_cq->ib_cq.uobject		= NULL;
+	free_mr->mr_free_cq->ib_cq.comp_handler		= NULL;
+	free_mr->mr_free_cq->ib_cq.event_handler	= NULL;
+	free_mr->mr_free_cq->ib_cq.cq_context		= NULL;
+	atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0);
+
+	pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL);
+	if (IS_ERR(pd)) {
+		dev_err(dev, "Create pd for reseved loop qp failed!");
+		ret = -ENOMEM;
+		goto alloc_pd_failed;
+	}
+	free_mr->mr_free_pd = to_hr_pd(pd);
+	free_mr->mr_free_pd->ibpd.device  = &hr_dev->ib_dev;
+	free_mr->mr_free_pd->ibpd.uobject = NULL;
+	atomic_set(&free_mr->mr_free_pd->ibpd.usecnt, 0);
+
+	attr.qp_access_flags	= IB_ACCESS_REMOTE_WRITE;
+	attr.pkey_index		= 0;
+	attr.min_rnr_timer	= 0;
+	/* Disable read ability */
+	attr.max_dest_rd_atomic = 0;
+	attr.max_rd_atomic	= 0;
+	/* Use arbitrary values as rq_psn and sq_psn */
+	attr.rq_psn		= 0x0808;
+	attr.sq_psn		= 0x0808;
+	attr.retry_cnt		= 7;
+	attr.rnr_retry		= 7;
+	attr.timeout		= 0x12;
+	attr.path_mtu		= IB_MTU_256;
+	attr.ah_attr.ah_flags		= 1;
+	attr.ah_attr.static_rate	= 3;
+	attr.ah_attr.grh.sgid_index	= 0;
+	attr.ah_attr.grh.hop_limit	= 1;
+	attr.ah_attr.grh.flow_label	= 0;
+	attr.ah_attr.grh.traffic_class	= 0;
+
+	subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+	for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
+		free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd);
+		if (IS_ERR(free_mr->mr_free_qp[i])) {
+			dev_err(dev, "Create loop qp failed!\n");
+			goto create_lp_qp_failed;
+		}
+		hr_qp = free_mr->mr_free_qp[i];
+
+		sl = i / caps->num_ports;
+
+		if (caps->num_ports == HNS_ROCE_MAX_PORTS)
+			phy_port = (i >= HNS_ROCE_MAX_PORTS) ? (i - 2) :
+				(i % caps->num_ports);
+		else
+			phy_port = i % caps->num_ports;
+
+		hr_qp->port		= phy_port + 1;
+		hr_qp->phy_port		= phy_port;
+		hr_qp->ibqp.qp_type	= IB_QPT_RC;
+		hr_qp->ibqp.device	= &hr_dev->ib_dev;
+		hr_qp->ibqp.uobject	= NULL;
+		atomic_set(&hr_qp->ibqp.usecnt, 0);
+		hr_qp->ibqp.pd		= pd;
+		hr_qp->ibqp.recv_cq	= cq;
+		hr_qp->ibqp.send_cq	= cq;
+
+		attr.ah_attr.port_num	= phy_port + 1;
+		attr.ah_attr.sl		= sl;
+		attr.port_num		= phy_port + 1;
+
+		attr.dest_qp_num	= hr_qp->qpn;
+		memcpy(attr.ah_attr.dmac, hr_dev->dev_addr[phy_port],
+		       MAC_ADDR_OCTET_NUM);
+
+		memcpy(attr.ah_attr.grh.dgid.raw,
+			&subnet_prefix, sizeof(u64));
+		memcpy(&attr.ah_attr.grh.dgid.raw[8],
+		       hr_dev->dev_addr[phy_port], 3);
+		memcpy(&attr.ah_attr.grh.dgid.raw[13],
+		       hr_dev->dev_addr[phy_port] + 3, 3);
+		attr.ah_attr.grh.dgid.raw[11] = 0xff;
+		attr.ah_attr.grh.dgid.raw[12] = 0xfe;
+		attr.ah_attr.grh.dgid.raw[8] ^= 2;
+
+		attr_mask |= IB_QP_PORT;
+
+		ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
+					    IB_QPS_RESET, IB_QPS_INIT);
+		if (ret) {
+			dev_err(dev, "modify qp failed(%d)!\n", ret);
+			goto create_lp_qp_failed;
+		}
+
+		ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
+					    IB_QPS_INIT, IB_QPS_RTR);
+		if (ret) {
+			dev_err(dev, "modify qp failed(%d)!\n", ret);
+			goto create_lp_qp_failed;
+		}
+
+		ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
+					    IB_QPS_RTR, IB_QPS_RTS);
+		if (ret) {
+			dev_err(dev, "modify qp failed(%d)!\n", ret);
+			goto create_lp_qp_failed;
+		}
+	}
+
+	return 0;
+
+create_lp_qp_failed:
+	for (i -= 1; i >= 0; i--) {
+		hr_qp = free_mr->mr_free_qp[i];
+		if (hns_roce_v1_destroy_qp(&hr_qp->ibqp))
+			dev_err(dev, "Destroy qp %d for mr free failed!\n", i);
+	}
+
+	if (hns_roce_dealloc_pd(pd))
+		dev_err(dev, "Destroy pd for create_lp_qp failed!\n");
+
+alloc_pd_failed:
+	if (hns_roce_ib_destroy_cq(cq))
+		dev_err(dev, "Destroy cq for create_lp_qp failed!\n");
+
+	return -EINVAL;
+}
+
+static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev)
+{
+	struct device *dev = &hr_dev->pdev->dev;
+	struct hns_roce_free_mr *free_mr;
+	struct hns_roce_v1_priv *priv;
+	struct hns_roce_qp *hr_qp;
+	int ret;
+	int i;
+
+	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+	free_mr = &priv->free_mr;
+
+	for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
+		hr_qp = free_mr->mr_free_qp[i];
+		ret = hns_roce_v1_destroy_qp(&hr_qp->ibqp);
+		if (ret)
+			dev_err(dev, "Destroy qp %d for mr free failed(%d)!\n",
+				i, ret);
+	}
+
+	ret = hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq);
+	if (ret)
+		dev_err(dev, "Destroy cq for mr_free failed(%d)!\n", ret);
+
+	ret = hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd);
+	if (ret)
+		dev_err(dev, "Destroy pd for mr_free failed(%d)!\n", ret);
+}
+
 static int hns_roce_db_init(struct hns_roce_dev *hr_dev)
 {
 	struct device *dev = &hr_dev->pdev->dev;
@@ -648,6 +868,223 @@
 	return 0;
 }
 
+void hns_roce_v1_recreate_lp_qp_work_fn(struct work_struct *work)
+{
+	struct hns_roce_recreate_lp_qp_work *lp_qp_work;
+	struct hns_roce_dev *hr_dev;
+
+	lp_qp_work = container_of(work, struct hns_roce_recreate_lp_qp_work,
+				  work);
+	hr_dev = to_hr_dev(lp_qp_work->ib_dev);
+
+	hns_roce_v1_release_lp_qp(hr_dev);
+
+	if (hns_roce_v1_rsv_lp_qp(hr_dev))
+		dev_err(&hr_dev->pdev->dev, "create reserver qp failed\n");
+
+	if (lp_qp_work->comp_flag)
+		complete(lp_qp_work->comp);
+
+	kfree(lp_qp_work);
+}
+
+static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev)
+{
+	struct device *dev = &hr_dev->pdev->dev;
+	struct hns_roce_recreate_lp_qp_work *lp_qp_work;
+	struct hns_roce_free_mr *free_mr;
+	struct hns_roce_v1_priv *priv;
+	struct completion comp;
+	unsigned long end =
+	  msecs_to_jiffies(HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS) + jiffies;
+
+	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+	free_mr = &priv->free_mr;
+
+	lp_qp_work = kzalloc(sizeof(struct hns_roce_recreate_lp_qp_work),
+			     GFP_KERNEL);
+
+	INIT_WORK(&(lp_qp_work->work), hns_roce_v1_recreate_lp_qp_work_fn);
+
+	lp_qp_work->ib_dev = &(hr_dev->ib_dev);
+	lp_qp_work->comp = &comp;
+	lp_qp_work->comp_flag = 1;
+
+	init_completion(lp_qp_work->comp);
+
+	queue_work(free_mr->free_mr_wq, &(lp_qp_work->work));
+
+	while (time_before_eq(jiffies, end)) {
+		if (try_wait_for_completion(&comp))
+			return 0;
+		msleep(HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE);
+	}
+
+	lp_qp_work->comp_flag = 0;
+	if (try_wait_for_completion(&comp))
+		return 0;
+
+	dev_warn(dev, "recreate lp qp failed 20s timeout and return failed!\n");
+	return -ETIMEDOUT;
+}
+
+static int hns_roce_v1_send_lp_wqe(struct hns_roce_qp *hr_qp)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
+	struct device *dev = &hr_dev->pdev->dev;
+	struct ib_send_wr send_wr, *bad_wr;
+	int ret;
+
+	memset(&send_wr, 0, sizeof(send_wr));
+	send_wr.next	= NULL;
+	send_wr.num_sge	= 0;
+	send_wr.send_flags = 0;
+	send_wr.sg_list	= NULL;
+	send_wr.wr_id	= (unsigned long long)&send_wr;
+	send_wr.opcode	= IB_WR_RDMA_WRITE;
+
+	ret = hns_roce_v1_post_send(&hr_qp->ibqp, &send_wr, &bad_wr);
+	if (ret) {
+		dev_err(dev, "Post write wqe for mr free failed(%d)!", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
+{
+	struct hns_roce_mr_free_work *mr_work;
+	struct ib_wc wc[HNS_ROCE_V1_RESV_QP];
+	struct hns_roce_free_mr *free_mr;
+	struct hns_roce_cq *mr_free_cq;
+	struct hns_roce_v1_priv *priv;
+	struct hns_roce_dev *hr_dev;
+	struct hns_roce_mr *hr_mr;
+	struct hns_roce_qp *hr_qp;
+	struct device *dev;
+	unsigned long end =
+		msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies;
+	int i;
+	int ret;
+	int ne;
+
+	mr_work = container_of(work, struct hns_roce_mr_free_work, work);
+	hr_mr = (struct hns_roce_mr *)mr_work->mr;
+	hr_dev = to_hr_dev(mr_work->ib_dev);
+	dev = &hr_dev->pdev->dev;
+
+	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+	free_mr = &priv->free_mr;
+	mr_free_cq = free_mr->mr_free_cq;
+
+	for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
+		hr_qp = free_mr->mr_free_qp[i];
+		ret = hns_roce_v1_send_lp_wqe(hr_qp);
+		if (ret) {
+			dev_err(dev,
+			     "Send wqe (qp:0x%lx) for mr free failed(%d)!\n",
+			     hr_qp->qpn, ret);
+			goto free_work;
+		}
+	}
+
+	ne = HNS_ROCE_V1_RESV_QP;
+	do {
+		ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc);
+		if (ret < 0) {
+			dev_err(dev,
+			   "(qp:0x%lx) starts, Poll cqe failed(%d) for mr 0x%x free! Remain %d cqe\n",
+			   hr_qp->qpn, ret, hr_mr->key, ne);
+			goto free_work;
+		}
+		ne -= ret;
+		msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE);
+	} while (ne && time_before_eq(jiffies, end));
+
+	if (ne != 0)
+		dev_err(dev,
+			"Poll cqe for mr 0x%x free timeout! Remain %d cqe\n",
+			hr_mr->key, ne);
+
+free_work:
+	if (mr_work->comp_flag)
+		complete(mr_work->comp);
+	kfree(mr_work);
+}
+
+int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
+{
+	struct device *dev = &hr_dev->pdev->dev;
+	struct hns_roce_mr_free_work *mr_work;
+	struct hns_roce_free_mr *free_mr;
+	struct hns_roce_v1_priv *priv;
+	struct completion comp;
+	unsigned long end =
+		msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies;
+	unsigned long start = jiffies;
+	int npages;
+	int ret = 0;
+
+	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+	free_mr = &priv->free_mr;
+
+	if (mr->enabled) {
+		if (hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key)
+				       & (hr_dev->caps.num_mtpts - 1)))
+			dev_warn(dev, "HW2SW_MPT failed!\n");
+	}
+
+	mr_work = kzalloc(sizeof(*mr_work), GFP_KERNEL);
+	if (!mr_work) {
+		ret = -ENOMEM;
+		goto free_mr;
+	}
+
+	INIT_WORK(&(mr_work->work), hns_roce_v1_mr_free_work_fn);
+
+	mr_work->ib_dev = &(hr_dev->ib_dev);
+	mr_work->comp = &comp;
+	mr_work->comp_flag = 1;
+	mr_work->mr = (void *)mr;
+	init_completion(mr_work->comp);
+
+	queue_work(free_mr->free_mr_wq, &(mr_work->work));
+
+	while (time_before_eq(jiffies, end)) {
+		if (try_wait_for_completion(&comp))
+			goto free_mr;
+		msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE);
+	}
+
+	mr_work->comp_flag = 0;
+	if (try_wait_for_completion(&comp))
+		goto free_mr;
+
+	dev_warn(dev, "Free mr work 0x%x over 50s and failed!\n", mr->key);
+	ret = -ETIMEDOUT;
+
+free_mr:
+	dev_dbg(dev, "Free mr 0x%x use 0x%x us.\n",
+		mr->key, jiffies_to_usecs(jiffies) - jiffies_to_usecs(start));
+
+	if (mr->size != ~0ULL) {
+		npages = ib_umem_page_count(mr->umem);
+		dma_free_coherent(dev, npages * 8, mr->pbl_buf,
+				  mr->pbl_dma_addr);
+	}
+
+	hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
+			     key_to_hw_index(mr->key), 0);
+
+	if (mr->umem)
+		ib_umem_release(mr->umem);
+
+	kfree(mr);
+
+	return ret;
+}
+
 static void hns_roce_db_free(struct hns_roce_dev *hr_dev)
 {
 	struct device *dev = &hr_dev->pdev->dev;
@@ -849,6 +1286,85 @@
 		priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map);
 }
 
+static int hns_roce_tptr_init(struct hns_roce_dev *hr_dev)
+{
+	struct device *dev = &hr_dev->pdev->dev;
+	struct hns_roce_buf_list *tptr_buf;
+	struct hns_roce_v1_priv *priv;
+
+	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+	tptr_buf = &priv->tptr_table.tptr_buf;
+
+	/*
+	 * This buffer will be used for CQ's tptr(tail pointer), also
+	 * named ci(customer index). Every CQ will use 2 bytes to save
+	 * cqe ci in hip06. Hardware will read this area to get new ci
+	 * when the queue is almost full.
+	 */
+	tptr_buf->buf = dma_alloc_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE,
+					   &tptr_buf->map, GFP_KERNEL);
+	if (!tptr_buf->buf)
+		return -ENOMEM;
+
+	hr_dev->tptr_dma_addr = tptr_buf->map;
+	hr_dev->tptr_size = HNS_ROCE_V1_TPTR_BUF_SIZE;
+
+	return 0;
+}
+
+static void hns_roce_tptr_free(struct hns_roce_dev *hr_dev)
+{
+	struct device *dev = &hr_dev->pdev->dev;
+	struct hns_roce_buf_list *tptr_buf;
+	struct hns_roce_v1_priv *priv;
+
+	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+	tptr_buf = &priv->tptr_table.tptr_buf;
+
+	dma_free_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE,
+			  tptr_buf->buf, tptr_buf->map);
+}
+
+static int hns_roce_free_mr_init(struct hns_roce_dev *hr_dev)
+{
+	struct device *dev = &hr_dev->pdev->dev;
+	struct hns_roce_free_mr *free_mr;
+	struct hns_roce_v1_priv *priv;
+	int ret = 0;
+
+	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+	free_mr = &priv->free_mr;
+
+	free_mr->free_mr_wq = create_singlethread_workqueue("hns_roce_free_mr");
+	if (!free_mr->free_mr_wq) {
+		dev_err(dev, "Create free mr workqueue failed!\n");
+		return -ENOMEM;
+	}
+
+	ret = hns_roce_v1_rsv_lp_qp(hr_dev);
+	if (ret) {
+		dev_err(dev, "Reserved loop qp failed(%d)!\n", ret);
+		flush_workqueue(free_mr->free_mr_wq);
+		destroy_workqueue(free_mr->free_mr_wq);
+	}
+
+	return ret;
+}
+
+static void hns_roce_free_mr_free(struct hns_roce_dev *hr_dev)
+{
+	struct hns_roce_free_mr *free_mr;
+	struct hns_roce_v1_priv *priv;
+
+	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+	free_mr = &priv->free_mr;
+
+	flush_workqueue(free_mr->free_mr_wq);
+	destroy_workqueue(free_mr->free_mr_wq);
+
+	hns_roce_v1_release_lp_qp(hr_dev);
+}
+
 /**
  * hns_roce_v1_reset - reset RoCE
  * @hr_dev: RoCE device struct pointer
@@ -898,6 +1414,38 @@
 	return ret;
 }
 
+static int hns_roce_des_qp_init(struct hns_roce_dev *hr_dev)
+{
+	struct device *dev = &hr_dev->pdev->dev;
+	struct hns_roce_v1_priv *priv;
+	struct hns_roce_des_qp *des_qp;
+
+	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+	des_qp = &priv->des_qp;
+
+	des_qp->requeue_flag = 1;
+	des_qp->qp_wq = create_singlethread_workqueue("hns_roce_destroy_qp");
+	if (!des_qp->qp_wq) {
+		dev_err(dev, "Create destroy qp workqueue failed!\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void hns_roce_des_qp_free(struct hns_roce_dev *hr_dev)
+{
+	struct hns_roce_v1_priv *priv;
+	struct hns_roce_des_qp *des_qp;
+
+	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+	des_qp = &priv->des_qp;
+
+	des_qp->requeue_flag = 0;
+	flush_workqueue(des_qp->qp_wq);
+	destroy_workqueue(des_qp->qp_wq);
+}
+
 void hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
 {
 	int i = 0;
@@ -906,12 +1454,11 @@
 	hr_dev->vendor_id = le32_to_cpu(roce_read(hr_dev, ROCEE_VENDOR_ID_REG));
 	hr_dev->vendor_part_id = le32_to_cpu(roce_read(hr_dev,
 					     ROCEE_VENDOR_PART_ID_REG));
-	hr_dev->hw_rev = le32_to_cpu(roce_read(hr_dev, ROCEE_HW_VERSION_REG));
-
 	hr_dev->sys_image_guid = le32_to_cpu(roce_read(hr_dev,
 					     ROCEE_SYS_IMAGE_GUID_L_REG)) |
 				((u64)le32_to_cpu(roce_read(hr_dev,
 					    ROCEE_SYS_IMAGE_GUID_H_REG)) << 32);
+	hr_dev->hw_rev		= HNS_ROCE_HW_VER1;
 
 	caps->num_qps		= HNS_ROCE_V1_MAX_QP_NUM;
 	caps->max_wqes		= HNS_ROCE_V1_MAX_WQE_NUM;
@@ -1001,18 +1548,44 @@
 		goto error_failed_raq_init;
 	}
 
-	hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_UP);
-
 	ret = hns_roce_bt_init(hr_dev);
 	if (ret) {
 		dev_err(dev, "bt init failed!\n");
 		goto error_failed_bt_init;
 	}
 
+	ret = hns_roce_tptr_init(hr_dev);
+	if (ret) {
+		dev_err(dev, "tptr init failed!\n");
+		goto error_failed_tptr_init;
+	}
+
+	ret = hns_roce_des_qp_init(hr_dev);
+	if (ret) {
+		dev_err(dev, "des qp init failed!\n");
+		goto error_failed_des_qp_init;
+	}
+
+	ret = hns_roce_free_mr_init(hr_dev);
+	if (ret) {
+		dev_err(dev, "free mr init failed!\n");
+		goto error_failed_free_mr_init;
+	}
+
+	hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_UP);
+
 	return 0;
 
+error_failed_free_mr_init:
+	hns_roce_des_qp_free(hr_dev);
+
+error_failed_des_qp_init:
+	hns_roce_tptr_free(hr_dev);
+
+error_failed_tptr_init:
+	hns_roce_bt_free(hr_dev);
+
 error_failed_bt_init:
-	hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN);
 	hns_roce_raq_free(hr_dev);
 
 error_failed_raq_init:
@@ -1022,8 +1595,11 @@
 
 void hns_roce_v1_exit(struct hns_roce_dev *hr_dev)
 {
-	hns_roce_bt_free(hr_dev);
 	hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN);
+	hns_roce_free_mr_free(hr_dev);
+	hns_roce_des_qp_free(hr_dev);
+	hns_roce_tptr_free(hr_dev);
+	hns_roce_bt_free(hr_dev);
 	hns_roce_raq_free(hr_dev);
 	hns_roce_db_free(hr_dev);
 }
@@ -1061,6 +1637,14 @@
 	u32 *p;
 	u32 val;
 
+	/*
+	 * When mac changed, loopback may fail
+	 * because of smac not equal to dmac.
+	 * We Need to release and create reserved qp again.
+	 */
+	if (hr_dev->hw->dereg_mr && hns_roce_v1_recreate_lp_qp(hr_dev))
+		dev_warn(&hr_dev->pdev->dev, "recreate lp qp timeout!\n");
+
 	p = (u32 *)(&addr[0]);
 	reg_smac_l = *p;
 	roce_raw_write(reg_smac_l, hr_dev->reg_base + ROCEE_SMAC_L_0_REG +
@@ -1293,9 +1877,9 @@
 	}
 
 	/*
-	* Now backwards through the CQ, removing CQ entries
-	* that match our QP by overwriting them with next entries.
-	*/
+	 * Now backwards through the CQ, removing CQ entries
+	 * that match our QP by overwriting them with next entries.
+	 */
 	while ((int) --prod_index - (int) hr_cq->cons_index >= 0) {
 		cqe = get_cqe(hr_cq, prod_index & hr_cq->ib_cq.cqe);
 		if ((roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
@@ -1317,9 +1901,9 @@
 	if (nfreed) {
 		hr_cq->cons_index += nfreed;
 		/*
-		* Make sure update of buffer contents is done before
-		* updating consumer index.
-		*/
+		 * Make sure update of buffer contents is done before
+		 * updating consumer index.
+		 */
 		wmb();
 
 		hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
@@ -1339,14 +1923,21 @@
 			   dma_addr_t dma_handle, int nent, u32 vector)
 {
 	struct hns_roce_cq_context *cq_context = NULL;
-	void __iomem *tptr_addr;
+	struct hns_roce_buf_list *tptr_buf;
+	struct hns_roce_v1_priv *priv;
+	dma_addr_t tptr_dma_addr;
+	int offset;
+
+	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+	tptr_buf = &priv->tptr_table.tptr_buf;
 
 	cq_context = mb_buf;
 	memset(cq_context, 0, sizeof(*cq_context));
 
-	tptr_addr = 0;
-	hr_dev->priv_addr = tptr_addr;
-	hr_cq->tptr_addr = tptr_addr;
+	/* Get the tptr for this CQ. */
+	offset = hr_cq->cqn * HNS_ROCE_V1_TPTR_ENTRY_SIZE;
+	tptr_dma_addr = tptr_buf->map + offset;
+	hr_cq->tptr_addr = (u16 *)(tptr_buf->buf + offset);
 
 	/* Register cq_context members */
 	roce_set_field(cq_context->cqc_byte_4,
@@ -1390,10 +1981,10 @@
 	roce_set_field(cq_context->cqc_byte_20,
 		       CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M,
 		       CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S,
-		       (u64)tptr_addr >> 44);
+		       tptr_dma_addr >> 44);
 	cq_context->cqc_byte_20 = cpu_to_le32(cq_context->cqc_byte_20);
 
-	cq_context->cqe_tptr_addr_l = (u32)((u64)tptr_addr >> 12);
+	cq_context->cqe_tptr_addr_l = (u32)(tptr_dma_addr >> 12);
 
 	roce_set_field(cq_context->cqc_byte_32,
 		       CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M,
@@ -1407,7 +1998,7 @@
 	roce_set_bit(cq_context->cqc_byte_32,
 		     CQ_CQNTEXT_CQC_BYTE_32_TYPE_OF_COMPLETION_NOTIFICATION_S,
 		     0);
-	/*The initial value of cq's ci is 0 */
+	/* The initial value of cq's ci is 0 */
 	roce_set_field(cq_context->cqc_byte_32,
 		       CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M,
 		       CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0);
@@ -1424,9 +2015,9 @@
 	notification_flag = (flags & IB_CQ_SOLICITED_MASK) ==
 			    IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL;
 	/*
-	* flags = 0; Notification Flag = 1, next
-	* flags = 1; Notification Flag = 0, solocited
-	*/
+	 * flags = 0; Notification Flag = 1, next
+	 * flags = 1; Notification Flag = 0, solocited
+	 */
 	doorbell[0] = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1);
 	roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
 	roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
@@ -1581,10 +2172,10 @@
 		wq = &(*cur_qp)->sq;
 		if ((*cur_qp)->sq_signal_bits) {
 			/*
-			* If sg_signal_bit is 1,
-			* firstly tail pointer updated to wqe
-			* which current cqe correspond to
-			*/
+			 * If sg_signal_bit is 1,
+			 * firstly tail pointer updated to wqe
+			 * which current cqe correspond to
+			 */
 			wqe_ctr = (u16)roce_get_field(cqe->cqe_byte_4,
 						      CQE_BYTE_4_WQE_INDEX_M,
 						      CQE_BYTE_4_WQE_INDEX_S);
@@ -1659,8 +2250,14 @@
 			break;
 	}
 
-	if (npolled)
+	if (npolled) {
+		*hr_cq->tptr_addr = hr_cq->cons_index &
+			((hr_cq->cq_depth << 1) - 1);
+
+		/* Memroy barrier */
+		wmb();
 		hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
+	}
 
 	spin_unlock_irqrestore(&hr_cq->lock, flags);
 
@@ -1799,12 +2396,12 @@
 	if (op[cur_state][new_state] == HNS_ROCE_CMD_2RST_QP)
 		return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2,
 					 HNS_ROCE_CMD_2RST_QP,
-					 HNS_ROCE_CMD_TIME_CLASS_A);
+					 HNS_ROCE_CMD_TIMEOUT_MSECS);
 
 	if (op[cur_state][new_state] == HNS_ROCE_CMD_2ERR_QP)
 		return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2,
 					 HNS_ROCE_CMD_2ERR_QP,
-					 HNS_ROCE_CMD_TIME_CLASS_A);
+					 HNS_ROCE_CMD_TIMEOUT_MSECS);
 
 	mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
 	if (IS_ERR(mailbox))
@@ -1814,7 +2411,7 @@
 
 	ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0,
 				op[cur_state][new_state],
-				HNS_ROCE_CMD_TIME_CLASS_C);
+				HNS_ROCE_CMD_TIMEOUT_MSECS);
 
 	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
 	return ret;
@@ -2000,11 +2597,11 @@
 	}
 
 	/*
-	*Reset to init
-	*	Mandatory param:
-	*	IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS
-	*	Optional param: NA
-	*/
+	 * Reset to init
+	 *	Mandatory param:
+	 *	IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS
+	 *	Optional param: NA
+	 */
 	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
 		roce_set_field(context->qpc_bytes_4,
 			       QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M,
@@ -2172,24 +2769,14 @@
 			     QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S,
 			     hr_qp->sq_signal_bits);
 
-		for (port = 0; port < hr_dev->caps.num_ports; port++) {
-			smac = (u8 *)hr_dev->dev_addr[port];
-			dev_dbg(dev, "smac: %2x: %2x: %2x: %2x: %2x: %2x\n",
-				smac[0], smac[1], smac[2], smac[3], smac[4],
-				smac[5]);
-			if ((dmac[0] == smac[0]) && (dmac[1] == smac[1]) &&
-			    (dmac[2] == smac[2]) && (dmac[3] == smac[3]) &&
-			    (dmac[4] == smac[4]) && (dmac[5] == smac[5])) {
-				roce_set_bit(context->qpc_bytes_32,
-				    QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S,
-				    1);
-				break;
-			}
-		}
-
-		if (hr_dev->loop_idc == 0x1)
+		port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) :
+			hr_qp->port;
+		smac = (u8 *)hr_dev->dev_addr[port];
+		/* when dmac equals smac or loop_idc is 1, it should loopback */
+		if (ether_addr_equal_unaligned(dmac, smac) ||
+		    hr_dev->loop_idc == 0x1)
 			roce_set_bit(context->qpc_bytes_32,
-				QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1);
+			      QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1);
 
 		roce_set_bit(context->qpc_bytes_32,
 			     QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S,
@@ -2509,7 +3096,7 @@
 	/* Every status migrate must change state */
 	roce_set_field(context->qpc_bytes_144,
 		       QP_CONTEXT_QPC_BYTES_144_QP_STATE_M,
-		       QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, attr->qp_state);
+		       QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, new_state);
 
 	/* SW pass context to HW */
 	ret = hns_roce_v1_qp_modify(hr_dev, &hr_qp->mtt,
@@ -2522,9 +3109,9 @@
 	}
 
 	/*
-	* Use rst2init to instead of init2init with drv,
-	* need to hw to flash RQ HEAD by DB again
-	*/
+	 * Use rst2init to instead of init2init with drv,
+	 * need to hw to flash RQ HEAD by DB again
+	 */
 	if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
 		/* Memory barrier */
 		wmb();
@@ -2619,7 +3206,7 @@
 
 	ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0,
 				HNS_ROCE_CMD_QUERY_QP,
-				HNS_ROCE_CMD_TIME_CLASS_A);
+				HNS_ROCE_CMD_TIMEOUT_MSECS);
 	if (!ret)
 		memcpy(hr_context, mailbox->buf, sizeof(*hr_context));
 	else
@@ -2630,8 +3217,78 @@
 	return ret;
 }
 
-int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
-			 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+			     int qp_attr_mask,
+			     struct ib_qp_init_attr *qp_init_attr)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+	struct hns_roce_sqp_context context;
+	u32 addr;
+
+	mutex_lock(&hr_qp->mutex);
+
+	if (hr_qp->state == IB_QPS_RESET) {
+		qp_attr->qp_state = IB_QPS_RESET;
+		goto done;
+	}
+
+	addr = ROCEE_QP1C_CFG0_0_REG +
+		hr_qp->port * sizeof(struct hns_roce_sqp_context);
+	context.qp1c_bytes_4 = roce_read(hr_dev, addr);
+	context.sq_rq_bt_l = roce_read(hr_dev, addr + 1);
+	context.qp1c_bytes_12 = roce_read(hr_dev, addr + 2);
+	context.qp1c_bytes_16 = roce_read(hr_dev, addr + 3);
+	context.qp1c_bytes_20 = roce_read(hr_dev, addr + 4);
+	context.cur_rq_wqe_ba_l = roce_read(hr_dev, addr + 5);
+	context.qp1c_bytes_28 = roce_read(hr_dev, addr + 6);
+	context.qp1c_bytes_32 = roce_read(hr_dev, addr + 7);
+	context.cur_sq_wqe_ba_l = roce_read(hr_dev, addr + 8);
+	context.qp1c_bytes_40 = roce_read(hr_dev, addr + 9);
+
+	hr_qp->state = roce_get_field(context.qp1c_bytes_4,
+				      QP1C_BYTES_4_QP_STATE_M,
+				      QP1C_BYTES_4_QP_STATE_S);
+	qp_attr->qp_state	= hr_qp->state;
+	qp_attr->path_mtu	= IB_MTU_256;
+	qp_attr->path_mig_state	= IB_MIG_ARMED;
+	qp_attr->qkey		= QKEY_VAL;
+	qp_attr->rq_psn		= 0;
+	qp_attr->sq_psn		= 0;
+	qp_attr->dest_qp_num	= 1;
+	qp_attr->qp_access_flags = 6;
+
+	qp_attr->pkey_index = roce_get_field(context.qp1c_bytes_20,
+					     QP1C_BYTES_20_PKEY_IDX_M,
+					     QP1C_BYTES_20_PKEY_IDX_S);
+	qp_attr->port_num = hr_qp->port + 1;
+	qp_attr->sq_draining = 0;
+	qp_attr->max_rd_atomic = 0;
+	qp_attr->max_dest_rd_atomic = 0;
+	qp_attr->min_rnr_timer = 0;
+	qp_attr->timeout = 0;
+	qp_attr->retry_cnt = 0;
+	qp_attr->rnr_retry = 0;
+	qp_attr->alt_timeout = 0;
+
+done:
+	qp_attr->cur_qp_state = qp_attr->qp_state;
+	qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
+	qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
+	qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
+	qp_attr->cap.max_send_sge = hr_qp->sq.max_gs;
+	qp_attr->cap.max_inline_data = 0;
+	qp_init_attr->cap = qp_attr->cap;
+	qp_init_attr->create_flags = 0;
+
+	mutex_unlock(&hr_qp->mutex);
+
+	return 0;
+}
+
+static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+			    int qp_attr_mask,
+			    struct ib_qp_init_attr *qp_init_attr)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
 	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
@@ -2725,9 +3382,7 @@
 	qp_attr->pkey_index = roce_get_field(context->qpc_bytes_12,
 			      QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
 			      QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S);
-	qp_attr->port_num = (u8)roce_get_field(context->qpc_bytes_156,
-			     QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M,
-			     QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S) + 1;
+	qp_attr->port_num = hr_qp->port + 1;
 	qp_attr->sq_draining = 0;
 	qp_attr->max_rd_atomic = roce_get_field(context->qpc_bytes_156,
 				 QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M,
@@ -2767,136 +3422,399 @@
 	return ret;
 }
 
-static void hns_roce_v1_destroy_qp_common(struct hns_roce_dev *hr_dev,
-					  struct hns_roce_qp *hr_qp,
-					  int is_user)
+int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+			 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
 {
-	u32 sdbinvcnt;
-	unsigned long end = 0;
-	u32 sdbinvcnt_val;
-	u32 sdbsendptr_val;
-	u32 sdbisusepr_val;
-	struct hns_roce_cq *send_cq, *recv_cq;
+	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+
+	return hr_qp->doorbell_qpn <= 1 ?
+		hns_roce_v1_q_sqp(ibqp, qp_attr, qp_attr_mask, qp_init_attr) :
+		hns_roce_v1_q_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr);
+}
+
+static int check_qp_db_process_status(struct hns_roce_dev *hr_dev,
+				      struct hns_roce_qp *hr_qp,
+				      u32 sdb_issue_ptr,
+				      u32 *sdb_inv_cnt,
+				      u32 *wait_stage)
+{
 	struct device *dev = &hr_dev->pdev->dev;
+	u32 sdb_retry_cnt, old_retry;
+	u32 sdb_send_ptr, old_send;
+	u32 success_flags = 0;
+	u32 cur_cnt, old_cnt;
+	unsigned long end;
+	u32 send_ptr;
+	u32 inv_cnt;
+	u32 tsp_st;
 
-	if (hr_qp->ibqp.qp_type == IB_QPT_RC) {
-		if (hr_qp->state != IB_QPS_RESET) {
-			/*
-			* Set qp to ERR,
-			* waiting for hw complete processing all dbs
-			*/
-			if (hns_roce_v1_qp_modify(hr_dev, NULL,
-					to_hns_roce_state(
-						(enum ib_qp_state)hr_qp->state),
-						HNS_ROCE_QP_STATE_ERR, NULL,
-						hr_qp))
-				dev_err(dev, "modify QP %06lx to ERR failed.\n",
-					hr_qp->qpn);
+	if (*wait_stage > HNS_ROCE_V1_DB_STAGE2 ||
+	    *wait_stage < HNS_ROCE_V1_DB_STAGE1) {
+		dev_err(dev, "QP(0x%lx) db status wait stage(%d) error!\n",
+			hr_qp->qpn, *wait_stage);
+		return -EINVAL;
+	}
 
-			/* Record issued doorbell */
-			sdbisusepr_val = roce_read(hr_dev,
-					 ROCEE_SDB_ISSUE_PTR_REG);
-			/*
-			* Query db process status,
-			* until hw process completely
-			*/
-			end = msecs_to_jiffies(
-			      HNS_ROCE_QP_DESTROY_TIMEOUT_MSECS) + jiffies;
-			do {
-				sdbsendptr_val = roce_read(hr_dev,
+	/* Calculate the total timeout for the entire verification process */
+	end = msecs_to_jiffies(HNS_ROCE_V1_CHECK_DB_TIMEOUT_MSECS) + jiffies;
+
+	if (*wait_stage == HNS_ROCE_V1_DB_STAGE1) {
+		/* Query db process status, until hw process completely */
+		sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG);
+		while (roce_hw_index_cmp_lt(sdb_send_ptr, sdb_issue_ptr,
+					    ROCEE_SDB_PTR_CMP_BITS)) {
+			if (!time_before(jiffies, end)) {
+				dev_dbg(dev, "QP(0x%lx) db process stage1 timeout. issue 0x%x send 0x%x.\n",
+					hr_qp->qpn, sdb_issue_ptr,
+					sdb_send_ptr);
+				return 0;
+			}
+
+			msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS);
+			sdb_send_ptr = roce_read(hr_dev,
 						 ROCEE_SDB_SEND_PTR_REG);
-				if (!time_before(jiffies, end)) {
-					dev_err(dev, "destroy qp(0x%lx) timeout!!!",
-						hr_qp->qpn);
-					break;
-				}
-			} while ((short)(roce_get_field(sdbsendptr_val,
-					ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
-					ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) -
-				roce_get_field(sdbisusepr_val,
-					ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M,
-					ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S)
-				) < 0);
+		}
 
-			/* Get list pointer */
-			sdbinvcnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG);
+		if (roce_get_field(sdb_issue_ptr,
+				   ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M,
+				   ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) ==
+		    roce_get_field(sdb_send_ptr,
+				   ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+				   ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)) {
+			old_send = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG);
+			old_retry = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG);
 
-			/* Query db's list status, until hw reversal */
 			do {
-				sdbinvcnt_val = roce_read(hr_dev,
-						ROCEE_SDB_INV_CNT_REG);
-				if (!time_before(jiffies, end)) {
-					dev_err(dev, "destroy qp(0x%lx) timeout!!!",
-						hr_qp->qpn);
-					dev_err(dev, "SdbInvCnt = 0x%x\n",
-						sdbinvcnt_val);
-					break;
+				tsp_st = roce_read(hr_dev, ROCEE_TSP_BP_ST_REG);
+				if (roce_get_bit(tsp_st,
+					ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S) == 1) {
+					*wait_stage = HNS_ROCE_V1_DB_WAIT_OK;
+					return 0;
 				}
-			} while ((short)(roce_get_field(sdbinvcnt_val,
-				  ROCEE_SDB_INV_CNT_SDB_INV_CNT_M,
-				  ROCEE_SDB_INV_CNT_SDB_INV_CNT_S) -
-				  (sdbinvcnt + SDB_INV_CNT_OFFSET)) < 0);
 
-			/* Modify qp to reset before destroying qp */
-			if (hns_roce_v1_qp_modify(hr_dev, NULL,
-					to_hns_roce_state(
-					(enum ib_qp_state)hr_qp->state),
-					HNS_ROCE_QP_STATE_RST, NULL, hr_qp))
-				dev_err(dev, "modify QP %06lx to RESET failed.\n",
-					hr_qp->qpn);
+				if (!time_before(jiffies, end)) {
+					dev_dbg(dev, "QP(0x%lx) db process stage1 timeout when send ptr equals issue ptr.\n"
+						     "issue 0x%x send 0x%x.\n",
+						hr_qp->qpn, sdb_issue_ptr,
+						sdb_send_ptr);
+					return 0;
+				}
+
+				msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS);
+
+				sdb_send_ptr = roce_read(hr_dev,
+							ROCEE_SDB_SEND_PTR_REG);
+				sdb_retry_cnt =	roce_read(hr_dev,
+						       ROCEE_SDB_RETRY_CNT_REG);
+				cur_cnt = roce_get_field(sdb_send_ptr,
+					ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+					ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
+					roce_get_field(sdb_retry_cnt,
+					ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
+					ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
+				if (!roce_get_bit(tsp_st,
+					ROCEE_CNT_CLR_CE_CNT_CLR_CE_S)) {
+					old_cnt = roce_get_field(old_send,
+					ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+					ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
+					roce_get_field(old_retry,
+					ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
+					ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
+					if (cur_cnt - old_cnt > SDB_ST_CMP_VAL)
+						success_flags = 1;
+				} else {
+					old_cnt = roce_get_field(old_send,
+					ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+					ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S);
+					if (cur_cnt - old_cnt > SDB_ST_CMP_VAL)
+						success_flags = 1;
+					else {
+					    send_ptr = roce_get_field(old_send,
+					    ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+					    ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
+					    roce_get_field(sdb_retry_cnt,
+					    ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
+					    ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
+					    roce_set_field(old_send,
+					    ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+					    ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S,
+						send_ptr);
+					}
+				}
+			} while (!success_flags);
+		}
+
+		*wait_stage = HNS_ROCE_V1_DB_STAGE2;
+
+		/* Get list pointer */
+		*sdb_inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG);
+		dev_dbg(dev, "QP(0x%lx) db process stage2. inv cnt = 0x%x.\n",
+			hr_qp->qpn, *sdb_inv_cnt);
+	}
+
+	if (*wait_stage == HNS_ROCE_V1_DB_STAGE2) {
+		/* Query db's list status, until hw reversal */
+		inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG);
+		while (roce_hw_index_cmp_lt(inv_cnt,
+					    *sdb_inv_cnt + SDB_INV_CNT_OFFSET,
+					    ROCEE_SDB_CNT_CMP_BITS)) {
+			if (!time_before(jiffies, end)) {
+				dev_dbg(dev, "QP(0x%lx) db process stage2 timeout. inv cnt 0x%x.\n",
+					hr_qp->qpn, inv_cnt);
+				return 0;
+			}
+
+			msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS);
+			inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG);
+		}
+
+		*wait_stage = HNS_ROCE_V1_DB_WAIT_OK;
+	}
+
+	return 0;
+}
+
+static int check_qp_reset_state(struct hns_roce_dev *hr_dev,
+				struct hns_roce_qp *hr_qp,
+				struct hns_roce_qp_work *qp_work_entry,
+				int *is_timeout)
+{
+	struct device *dev = &hr_dev->pdev->dev;
+	u32 sdb_issue_ptr;
+	int ret;
+
+	if (hr_qp->state != IB_QPS_RESET) {
+		/* Set qp to ERR, waiting for hw complete processing all dbs */
+		ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state,
+					    IB_QPS_ERR);
+		if (ret) {
+			dev_err(dev, "Modify QP(0x%lx) to ERR failed!\n",
+				hr_qp->qpn);
+			return ret;
+		}
+
+		/* Record issued doorbell */
+		sdb_issue_ptr = roce_read(hr_dev, ROCEE_SDB_ISSUE_PTR_REG);
+		qp_work_entry->sdb_issue_ptr = sdb_issue_ptr;
+		qp_work_entry->db_wait_stage = HNS_ROCE_V1_DB_STAGE1;
+
+		/* Query db process status, until hw process completely */
+		ret = check_qp_db_process_status(hr_dev, hr_qp, sdb_issue_ptr,
+						 &qp_work_entry->sdb_inv_cnt,
+						 &qp_work_entry->db_wait_stage);
+		if (ret) {
+			dev_err(dev, "Check QP(0x%lx) db process status failed!\n",
+				hr_qp->qpn);
+			return ret;
+		}
+
+		if (qp_work_entry->db_wait_stage != HNS_ROCE_V1_DB_WAIT_OK) {
+			qp_work_entry->sche_cnt = 0;
+			*is_timeout = 1;
+			return 0;
+		}
+
+		/* Modify qp to reset before destroying qp */
+		ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state,
+					    IB_QPS_RESET);
+		if (ret) {
+			dev_err(dev, "Modify QP(0x%lx) to RST failed!\n",
+				hr_qp->qpn);
+			return ret;
 		}
 	}
 
-	send_cq = to_hr_cq(hr_qp->ibqp.send_cq);
-	recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq);
+	return 0;
+}
 
-	hns_roce_lock_cqs(send_cq, recv_cq);
+static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work)
+{
+	struct hns_roce_qp_work *qp_work_entry;
+	struct hns_roce_v1_priv *priv;
+	struct hns_roce_dev *hr_dev;
+	struct hns_roce_qp *hr_qp;
+	struct device *dev;
+	int ret;
 
-	if (!is_user) {
-		__hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ?
-				       to_hr_srq(hr_qp->ibqp.srq) : NULL);
-		if (send_cq != recv_cq)
-			__hns_roce_v1_cq_clean(send_cq, hr_qp->qpn, NULL);
+	qp_work_entry = container_of(work, struct hns_roce_qp_work, work);
+	hr_dev = to_hr_dev(qp_work_entry->ib_dev);
+	dev = &hr_dev->pdev->dev;
+	priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+	hr_qp = qp_work_entry->qp;
+
+	dev_dbg(dev, "Schedule destroy QP(0x%lx) work.\n", hr_qp->qpn);
+
+	qp_work_entry->sche_cnt++;
+
+	/* Query db process status, until hw process completely */
+	ret = check_qp_db_process_status(hr_dev, hr_qp,
+					 qp_work_entry->sdb_issue_ptr,
+					 &qp_work_entry->sdb_inv_cnt,
+					 &qp_work_entry->db_wait_stage);
+	if (ret) {
+		dev_err(dev, "Check QP(0x%lx) db process status failed!\n",
+			hr_qp->qpn);
+		return;
+	}
+
+	if (qp_work_entry->db_wait_stage != HNS_ROCE_V1_DB_WAIT_OK &&
+	    priv->des_qp.requeue_flag) {
+		queue_work(priv->des_qp.qp_wq, work);
+		return;
+	}
+
+	/* Modify qp to reset before destroying qp */
+	ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state,
+				    IB_QPS_RESET);
+	if (ret) {
+		dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", hr_qp->qpn);
+		return;
 	}
 
 	hns_roce_qp_remove(hr_dev, hr_qp);
-
-	hns_roce_unlock_cqs(send_cq, recv_cq);
-
 	hns_roce_qp_free(hr_dev, hr_qp);
 
-	/* Not special_QP, free their QPN */
-	if ((hr_qp->ibqp.qp_type == IB_QPT_RC) ||
-	    (hr_qp->ibqp.qp_type == IB_QPT_UC) ||
-	    (hr_qp->ibqp.qp_type == IB_QPT_UD))
+	if (hr_qp->ibqp.qp_type == IB_QPT_RC) {
+		/* RC QP, release QPN */
 		hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
+		kfree(hr_qp);
+	} else
+		kfree(hr_to_hr_sqp(hr_qp));
 
-	hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
+	kfree(qp_work_entry);
 
-	if (is_user) {
-		ib_umem_release(hr_qp->umem);
-	} else {
-		kfree(hr_qp->sq.wrid);
-		kfree(hr_qp->rq.wrid);
-		hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
-	}
+	dev_dbg(dev, "Accomplished destroy QP(0x%lx) work.\n", hr_qp->qpn);
 }
 
 int hns_roce_v1_destroy_qp(struct ib_qp *ibqp)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
 	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+	struct device *dev = &hr_dev->pdev->dev;
+	struct hns_roce_qp_work qp_work_entry;
+	struct hns_roce_qp_work *qp_work;
+	struct hns_roce_v1_priv *priv;
+	struct hns_roce_cq *send_cq, *recv_cq;
+	int is_user = !!ibqp->pd->uobject;
+	int is_timeout = 0;
+	int ret;
 
-	hns_roce_v1_destroy_qp_common(hr_dev, hr_qp, !!ibqp->pd->uobject);
+	ret = check_qp_reset_state(hr_dev, hr_qp, &qp_work_entry, &is_timeout);
+	if (ret) {
+		dev_err(dev, "QP reset state check failed(%d)!\n", ret);
+		return ret;
+	}
 
-	if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
-		kfree(hr_to_hr_sqp(hr_qp));
-	else
-		kfree(hr_qp);
+	send_cq = to_hr_cq(hr_qp->ibqp.send_cq);
+	recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq);
+
+	hns_roce_lock_cqs(send_cq, recv_cq);
+	if (!is_user) {
+		__hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ?
+				       to_hr_srq(hr_qp->ibqp.srq) : NULL);
+		if (send_cq != recv_cq)
+			__hns_roce_v1_cq_clean(send_cq, hr_qp->qpn, NULL);
+	}
+	hns_roce_unlock_cqs(send_cq, recv_cq);
+
+	if (!is_timeout) {
+		hns_roce_qp_remove(hr_dev, hr_qp);
+		hns_roce_qp_free(hr_dev, hr_qp);
+
+		/* RC QP, release QPN */
+		if (hr_qp->ibqp.qp_type == IB_QPT_RC)
+			hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
+	}
+
+	hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
+
+	if (is_user)
+		ib_umem_release(hr_qp->umem);
+	else {
+		kfree(hr_qp->sq.wrid);
+		kfree(hr_qp->rq.wrid);
+
+		hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
+	}
+
+	if (!is_timeout) {
+		if (hr_qp->ibqp.qp_type == IB_QPT_RC)
+			kfree(hr_qp);
+		else
+			kfree(hr_to_hr_sqp(hr_qp));
+	} else {
+		qp_work = kzalloc(sizeof(*qp_work), GFP_KERNEL);
+		if (!qp_work)
+			return -ENOMEM;
+
+		INIT_WORK(&qp_work->work, hns_roce_v1_destroy_qp_work_fn);
+		qp_work->ib_dev	= &hr_dev->ib_dev;
+		qp_work->qp		= hr_qp;
+		qp_work->db_wait_stage	= qp_work_entry.db_wait_stage;
+		qp_work->sdb_issue_ptr	= qp_work_entry.sdb_issue_ptr;
+		qp_work->sdb_inv_cnt	= qp_work_entry.sdb_inv_cnt;
+		qp_work->sche_cnt	= qp_work_entry.sche_cnt;
+
+		priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+		queue_work(priv->des_qp.qp_wq, &qp_work->work);
+		dev_dbg(dev, "Begin destroy QP(0x%lx) work.\n", hr_qp->qpn);
+	}
 
 	return 0;
 }
 
+int hns_roce_v1_destroy_cq(struct ib_cq *ibcq)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
+	struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
+	struct device *dev = &hr_dev->pdev->dev;
+	u32 cqe_cnt_ori;
+	u32 cqe_cnt_cur;
+	u32 cq_buf_size;
+	int wait_time = 0;
+	int ret = 0;
+
+	hns_roce_free_cq(hr_dev, hr_cq);
+
+	/*
+	 * Before freeing cq buffer, we need to ensure that the outstanding CQE
+	 * have been written by checking the CQE counter.
+	 */
+	cqe_cnt_ori = roce_read(hr_dev, ROCEE_SCAEP_WR_CQE_CNT);
+	while (1) {
+		if (roce_read(hr_dev, ROCEE_CAEP_CQE_WCMD_EMPTY) &
+		    HNS_ROCE_CQE_WCMD_EMPTY_BIT)
+			break;
+
+		cqe_cnt_cur = roce_read(hr_dev, ROCEE_SCAEP_WR_CQE_CNT);
+		if ((cqe_cnt_cur - cqe_cnt_ori) >= HNS_ROCE_MIN_CQE_CNT)
+			break;
+
+		msleep(HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS);
+		if (wait_time > HNS_ROCE_MAX_FREE_CQ_WAIT_CNT) {
+			dev_warn(dev, "Destroy cq 0x%lx timeout!\n",
+				hr_cq->cqn);
+			ret = -ETIMEDOUT;
+			break;
+		}
+		wait_time++;
+	}
+
+	hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+
+	if (ibcq->uobject)
+		ib_umem_release(hr_cq->umem);
+	else {
+		/* Free the buff of stored cq */
+		cq_buf_size = (ibcq->cqe + 1) * hr_dev->caps.cq_entry_sz;
+		hns_roce_buf_free(hr_dev, cq_buf_size, &hr_cq->hr_buf.hr_buf);
+	}
+
+	kfree(hr_cq);
+
+	return ret;
+}
+
 struct hns_roce_v1_priv hr_v1_priv;
 
 struct hns_roce_hw hns_roce_hw_v1 = {
@@ -2917,5 +3835,7 @@
 	.post_recv = hns_roce_v1_post_recv,
 	.req_notify_cq = hns_roce_v1_req_notify_cq,
 	.poll_cq = hns_roce_v1_poll_cq,
+	.dereg_mr = hns_roce_v1_dereg_mr,
+	.destroy_cq = hns_roce_v1_destroy_cq,
 	.priv = &hr_v1_priv,
 };
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 539b0a3b..b213b5e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -58,6 +58,7 @@
 #define HNS_ROCE_V1_PHY_UAR_NUM				8
 
 #define HNS_ROCE_V1_GID_NUM				16
+#define HNS_ROCE_V1_RESV_QP				8
 
 #define HNS_ROCE_V1_NUM_COMP_EQE			0x8000
 #define HNS_ROCE_V1_NUM_ASYNC_EQE			0x400
@@ -102,8 +103,22 @@
 #define HNS_ROCE_V1_EXT_ODB_ALFUL	\
 	(HNS_ROCE_V1_EXT_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD)
 
+#define HNS_ROCE_V1_DB_WAIT_OK				0
+#define HNS_ROCE_V1_DB_STAGE1				1
+#define HNS_ROCE_V1_DB_STAGE2				2
+#define HNS_ROCE_V1_CHECK_DB_TIMEOUT_MSECS		10000
+#define HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS		20
+#define HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS		50000
+#define HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS	10000
+#define HNS_ROCE_V1_FREE_MR_WAIT_VALUE			5
+#define HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE		20
+
 #define HNS_ROCE_BT_RSV_BUF_SIZE			(1 << 17)
 
+#define HNS_ROCE_V1_TPTR_ENTRY_SIZE			2
+#define HNS_ROCE_V1_TPTR_BUF_SIZE	\
+	(HNS_ROCE_V1_TPTR_ENTRY_SIZE * HNS_ROCE_V1_MAX_CQ_NUM)
+
 #define HNS_ROCE_ODB_POLL_MODE				0
 
 #define HNS_ROCE_SDB_NORMAL_MODE			0
@@ -140,6 +155,7 @@
 #define SQ_PSN_SHIFT					8
 #define QKEY_VAL					0x80010000
 #define SDB_INV_CNT_OFFSET				8
+#define SDB_ST_CMP_VAL					8
 
 struct hns_roce_cq_context {
 	u32 cqc_byte_4;
@@ -436,6 +452,8 @@
 #define UD_SEND_WQE_U32_8_DMAC_5_M   \
 	(((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_5_S)
 
+#define UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S 22
+
 #define UD_SEND_WQE_U32_8_OPERATION_TYPE_S 16
 #define UD_SEND_WQE_U32_8_OPERATION_TYPE_M   \
 	(((1UL << 4) - 1) << UD_SEND_WQE_U32_8_OPERATION_TYPE_S)
@@ -480,13 +498,17 @@
 	u32 qp1c_bytes_12;
 	u32 qp1c_bytes_16;
 	u32 qp1c_bytes_20;
-	u32 qp1c_bytes_28;
 	u32 cur_rq_wqe_ba_l;
+	u32 qp1c_bytes_28;
 	u32 qp1c_bytes_32;
 	u32 cur_sq_wqe_ba_l;
 	u32 qp1c_bytes_40;
 };
 
+#define QP1C_BYTES_4_QP_STATE_S 0
+#define QP1C_BYTES_4_QP_STATE_M   \
+	(((1UL << 3) - 1) << QP1C_BYTES_4_QP_STATE_S)
+
 #define QP1C_BYTES_4_SQ_WQE_SHIFT_S 8
 #define QP1C_BYTES_4_SQ_WQE_SHIFT_M   \
 	(((1UL << 4) - 1) << QP1C_BYTES_4_SQ_WQE_SHIFT_S)
@@ -952,6 +974,10 @@
 #define SQ_DOORBELL_U32_4_SQ_HEAD_M   \
 	(((1UL << 15) - 1) << SQ_DOORBELL_U32_4_SQ_HEAD_S)
 
+#define SQ_DOORBELL_U32_4_SL_S 16
+#define SQ_DOORBELL_U32_4_SL_M   \
+	(((1UL << 2) - 1) << SQ_DOORBELL_U32_4_SL_S)
+
 #define SQ_DOORBELL_U32_4_PORT_S 18
 #define SQ_DOORBELL_U32_4_PORT_M  (((1UL << 3) - 1) << SQ_DOORBELL_U32_4_PORT_S)
 
@@ -979,12 +1005,58 @@
 	struct hns_roce_buf_list cqc_buf;
 };
 
+struct hns_roce_tptr_table {
+	struct hns_roce_buf_list tptr_buf;
+};
+
+struct hns_roce_qp_work {
+	struct	work_struct work;
+	struct	ib_device *ib_dev;
+	struct	hns_roce_qp *qp;
+	u32	db_wait_stage;
+	u32	sdb_issue_ptr;
+	u32	sdb_inv_cnt;
+	u32	sche_cnt;
+};
+
+struct hns_roce_des_qp {
+	struct workqueue_struct	*qp_wq;
+	int	requeue_flag;
+};
+
+struct hns_roce_mr_free_work {
+	struct	work_struct work;
+	struct	ib_device *ib_dev;
+	struct	completion *comp;
+	int	comp_flag;
+	void	*mr;
+};
+
+struct hns_roce_recreate_lp_qp_work {
+	struct	work_struct work;
+	struct	ib_device *ib_dev;
+	struct	completion *comp;
+	int	comp_flag;
+};
+
+struct hns_roce_free_mr {
+	struct workqueue_struct *free_mr_wq;
+	struct hns_roce_qp *mr_free_qp[HNS_ROCE_V1_RESV_QP];
+	struct hns_roce_cq *mr_free_cq;
+	struct hns_roce_pd *mr_free_pd;
+};
+
 struct hns_roce_v1_priv {
 	struct hns_roce_db_table  db_table;
 	struct hns_roce_raq_table raq_table;
 	struct hns_roce_bt_table  bt_table;
+	struct hns_roce_tptr_table tptr_table;
+	struct hns_roce_des_qp des_qp;
+	struct hns_roce_free_mr free_mr;
 };
 
 int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset);
+int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int hns_roce_v1_destroy_qp(struct ib_qp *ibqp);
 
 #endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 764e35a..4953d9c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -35,52 +35,13 @@
 #include <rdma/ib_addr.h>
 #include <rdma/ib_smi.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/ib_cache.h>
 #include "hns_roce_common.h"
 #include "hns_roce_device.h"
-#include "hns_roce_user.h"
+#include <rdma/hns-abi.h>
 #include "hns_roce_hem.h"
 
 /**
- * hns_roce_addrconf_ifid_eui48 - Get default gid.
- * @eui: eui.
- * @vlan_id:  gid
- * @dev:  net device
- * Description:
- *    MAC convert to GID
- *        gid[0..7] = fe80 0000 0000 0000
- *        gid[8] = mac[0] ^ 2
- *        gid[9] = mac[1]
- *        gid[10] = mac[2]
- *        gid[11] = ff        (VLAN ID high byte (4 MS bits))
- *        gid[12] = fe        (VLAN ID low byte)
- *        gid[13] = mac[3]
- *        gid[14] = mac[4]
- *        gid[15] = mac[5]
- */
-static void hns_roce_addrconf_ifid_eui48(u8 *eui, u16 vlan_id,
-					 struct net_device *dev)
-{
-	memcpy(eui, dev->dev_addr, 3);
-	memcpy(eui + 5, dev->dev_addr + 3, 3);
-	if (vlan_id < 0x1000) {
-		eui[3] = vlan_id >> 8;
-		eui[4] = vlan_id & 0xff;
-	} else {
-		eui[3] = 0xff;
-		eui[4] = 0xfe;
-	}
-	eui[0] ^= 2;
-}
-
-static void hns_roce_make_default_gid(struct net_device *dev, union ib_gid *gid)
-{
-	memset(gid, 0, sizeof(*gid));
-	gid->raw[0] = 0xFE;
-	gid->raw[1] = 0x80;
-	hns_roce_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev);
-}
-
-/**
  * hns_get_gid_index - Get gid index.
  * @hr_dev: pointer to structure hns_roce_dev.
  * @port:  port, value range: 0 ~ MAX
@@ -96,30 +57,6 @@
 	return gid_index * hr_dev->caps.num_ports + port;
 }
 
-static int hns_roce_set_gid(struct hns_roce_dev *hr_dev, u8 port, int gid_index,
-		     union ib_gid *gid)
-{
-	struct device *dev = &hr_dev->pdev->dev;
-	u8 gid_idx = 0;
-
-	if (gid_index >= hr_dev->caps.gid_table_len[port]) {
-		dev_err(dev, "gid_index %d illegal, port %d gid range: 0~%d\n",
-			gid_index, port, hr_dev->caps.gid_table_len[port] - 1);
-		return -EINVAL;
-	}
-
-	gid_idx = hns_get_gid_index(hr_dev, port, gid_index);
-
-	if (!memcmp(gid, &hr_dev->iboe.gid_table[gid_idx], sizeof(*gid)))
-		return -EINVAL;
-
-	memcpy(&hr_dev->iboe.gid_table[gid_idx], gid, sizeof(*gid));
-
-	hr_dev->hw->set_gid(hr_dev, port, gid_index, gid);
-
-	return 0;
-}
-
 static void hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr)
 {
 	u8 phy_port;
@@ -135,27 +72,44 @@
 	hr_dev->hw->set_mac(hr_dev, phy_port, addr);
 }
 
-static void hns_roce_set_mtu(struct hns_roce_dev *hr_dev, u8 port, int mtu)
+static int hns_roce_add_gid(struct ib_device *device, u8 port_num,
+			    unsigned int index, const union ib_gid *gid,
+			    const struct ib_gid_attr *attr, void **context)
 {
-	u8 phy_port = hr_dev->iboe.phy_port[port];
-	enum ib_mtu tmp;
+	struct hns_roce_dev *hr_dev = to_hr_dev(device);
+	u8 port = port_num - 1;
+	unsigned long flags;
 
-	tmp = iboe_get_mtu(mtu);
-	if (!tmp)
-		tmp = IB_MTU_256;
+	if (port >= hr_dev->caps.num_ports)
+		return -EINVAL;
 
-	hr_dev->hw->set_mtu(hr_dev, phy_port, tmp);
+	spin_lock_irqsave(&hr_dev->iboe.lock, flags);
+
+	hr_dev->hw->set_gid(hr_dev, port, index, (union ib_gid *)gid);
+
+	spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
+
+	return 0;
 }
 
-static void hns_roce_update_gids(struct hns_roce_dev *hr_dev, int port)
+static int hns_roce_del_gid(struct ib_device *device, u8 port_num,
+			    unsigned int index, void **context)
 {
-	struct ib_event event;
+	struct hns_roce_dev *hr_dev = to_hr_dev(device);
+	union ib_gid zgid = { {0} };
+	u8 port = port_num - 1;
+	unsigned long flags;
 
-	/* Refresh gid in ib_cache */
-	event.device = &hr_dev->ib_dev;
-	event.element.port_num = port + 1;
-	event.event = IB_EVENT_GID_CHANGE;
-	ib_dispatch_event(&event);
+	if (port >= hr_dev->caps.num_ports)
+		return -EINVAL;
+
+	spin_lock_irqsave(&hr_dev->iboe.lock, flags);
+
+	hr_dev->hw->set_gid(hr_dev, port, index, &zgid);
+
+	spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
+
+	return 0;
 }
 
 static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port,
@@ -163,9 +117,6 @@
 {
 	struct device *dev = &hr_dev->pdev->dev;
 	struct net_device *netdev;
-	unsigned long flags;
-	union ib_gid gid;
-	int ret = 0;
 
 	netdev = hr_dev->iboe.netdevs[port];
 	if (!netdev) {
@@ -173,7 +124,7 @@
 		return -ENODEV;
 	}
 
-	spin_lock_irqsave(&hr_dev->iboe.lock, flags);
+	spin_lock_bh(&hr_dev->iboe.lock);
 
 	switch (event) {
 	case NETDEV_UP:
@@ -181,23 +132,19 @@
 	case NETDEV_REGISTER:
 	case NETDEV_CHANGEADDR:
 		hns_roce_set_mac(hr_dev, port, netdev->dev_addr);
-		hns_roce_make_default_gid(netdev, &gid);
-		ret = hns_roce_set_gid(hr_dev, port, 0, &gid);
-		if (!ret)
-			hns_roce_update_gids(hr_dev, port);
 		break;
 	case NETDEV_DOWN:
 		/*
-		* In v1 engine, only support all ports closed together.
-		*/
+		 * In v1 engine, only support all ports closed together.
+		 */
 		break;
 	default:
 		dev_dbg(dev, "NETDEV event = 0x%x!\n", (u32)(event));
 		break;
 	}
 
-	spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
-	return ret;
+	spin_unlock_bh(&hr_dev->iboe.lock);
+	return 0;
 }
 
 static int hns_roce_netdev_event(struct notifier_block *self,
@@ -224,118 +171,17 @@
 	return NOTIFY_DONE;
 }
 
-static void hns_roce_addr_event(int event, struct net_device *event_netdev,
-				struct hns_roce_dev *hr_dev, union ib_gid *gid)
+static int hns_roce_setup_mtu_mac(struct hns_roce_dev *hr_dev)
 {
-	struct hns_roce_ib_iboe *iboe = NULL;
-	int gid_table_len = 0;
-	unsigned long flags;
-	union ib_gid zgid;
-	u8 gid_idx = 0;
-	u8 port = 0;
-	int i = 0;
-	int free;
-	struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ?
-				      rdma_vlan_dev_real_dev(event_netdev) :
-				      event_netdev;
-
-	if (event != NETDEV_UP && event != NETDEV_DOWN)
-		return;
-
-	iboe = &hr_dev->iboe;
-	while (port < hr_dev->caps.num_ports) {
-		if (real_dev == iboe->netdevs[port])
-			break;
-		port++;
-	}
-
-	if (port >= hr_dev->caps.num_ports) {
-		dev_dbg(&hr_dev->pdev->dev, "can't find netdev\n");
-		return;
-	}
-
-	memset(zgid.raw, 0, sizeof(zgid.raw));
-	free = -1;
-	gid_table_len = hr_dev->caps.gid_table_len[port];
-
-	spin_lock_irqsave(&hr_dev->iboe.lock, flags);
-
-	for (i = 0; i < gid_table_len; i++) {
-		gid_idx = hns_get_gid_index(hr_dev, port, i);
-		if (!memcmp(gid->raw, iboe->gid_table[gid_idx].raw,
-			    sizeof(gid->raw)))
-			break;
-		if (free < 0 && !memcmp(zgid.raw,
-			iboe->gid_table[gid_idx].raw, sizeof(zgid.raw)))
-			free = i;
-	}
-
-	if (i >= gid_table_len) {
-		if (free < 0) {
-			spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
-			dev_dbg(&hr_dev->pdev->dev,
-				"gid_index overflow, port(%d)\n", port);
-			return;
-		}
-		if (!hns_roce_set_gid(hr_dev, port, free, gid))
-			hns_roce_update_gids(hr_dev, port);
-	} else if (event == NETDEV_DOWN) {
-		if (!hns_roce_set_gid(hr_dev, port, i, &zgid))
-			hns_roce_update_gids(hr_dev, port);
-	}
-
-	spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
-}
-
-static int hns_roce_inet_event(struct notifier_block *self, unsigned long event,
-			       void *ptr)
-{
-	struct in_ifaddr *ifa = ptr;
-	struct hns_roce_dev *hr_dev;
-	struct net_device *dev = ifa->ifa_dev->dev;
-	union ib_gid gid;
-
-	ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid);
-
-	hr_dev = container_of(self, struct hns_roce_dev, iboe.nb_inet);
-
-	hns_roce_addr_event(event, dev, hr_dev, &gid);
-
-	return NOTIFY_DONE;
-}
-
-static int hns_roce_setup_mtu_gids(struct hns_roce_dev *hr_dev)
-{
-	struct in_ifaddr *ifa_list = NULL;
-	union ib_gid gid = {{0} };
-	u32 ipaddr = 0;
-	int index = 0;
-	int ret = 0;
-	u8 i = 0;
+	u8 i;
 
 	for (i = 0; i < hr_dev->caps.num_ports; i++) {
-		hns_roce_set_mtu(hr_dev, i,
-				 ib_mtu_enum_to_int(hr_dev->caps.max_mtu));
+		hr_dev->hw->set_mtu(hr_dev, hr_dev->iboe.phy_port[i],
+				    hr_dev->caps.max_mtu);
 		hns_roce_set_mac(hr_dev, i, hr_dev->iboe.netdevs[i]->dev_addr);
-
-		if (hr_dev->iboe.netdevs[i]->ip_ptr) {
-			ifa_list = hr_dev->iboe.netdevs[i]->ip_ptr->ifa_list;
-			index = 1;
-			while (ifa_list) {
-				ipaddr = ifa_list->ifa_address;
-				ipv6_addr_set_v4mapped(ipaddr,
-						       (struct in6_addr *)&gid);
-				ret = hns_roce_set_gid(hr_dev, i, index, &gid);
-				if (ret)
-					break;
-				index++;
-				ifa_list = ifa_list->ifa_next;
-			}
-			hns_roce_update_gids(hr_dev, i);
-		}
 	}
 
-	return ret;
+	return 0;
 }
 
 static int hns_roce_query_device(struct ib_device *ib_dev,
@@ -444,31 +290,6 @@
 static int hns_roce_query_gid(struct ib_device *ib_dev, u8 port_num, int index,
 			      union ib_gid *gid)
 {
-	struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
-	struct device *dev = &hr_dev->pdev->dev;
-	u8 gid_idx = 0;
-	u8 port;
-
-	if (port_num < 1 || port_num > hr_dev->caps.num_ports ||
-	    index >= hr_dev->caps.gid_table_len[port_num - 1]) {
-		dev_err(dev,
-			"port_num %d index %d illegal! correct range: port_num 1~%d index 0~%d!\n",
-			port_num, index, hr_dev->caps.num_ports,
-			hr_dev->caps.gid_table_len[port_num - 1] - 1);
-		return -EINVAL;
-	}
-
-	port = port_num - 1;
-	gid_idx = hns_get_gid_index(hr_dev, port, index);
-	if (gid_idx >= HNS_ROCE_MAX_GID_NUM) {
-		dev_err(dev, "port_num %d index %d illegal! total gid num %d!\n",
-			port_num, index, HNS_ROCE_MAX_GID_NUM);
-		return -EINVAL;
-	}
-
-	memcpy(gid->raw, hr_dev->iboe.gid_table[gid_idx].raw,
-	       HNS_ROCE_GID_SIZE);
-
 	return 0;
 }
 
@@ -549,6 +370,8 @@
 static int hns_roce_mmap(struct ib_ucontext *context,
 			 struct vm_area_struct *vma)
 {
+	struct hns_roce_dev *hr_dev = to_hr_dev(context->device);
+
 	if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0)
 		return -EINVAL;
 
@@ -558,10 +381,15 @@
 				       to_hr_ucontext(context)->uar.pfn,
 				       PAGE_SIZE, vma->vm_page_prot))
 			return -EAGAIN;
-
-	} else {
+	} else if (vma->vm_pgoff == 1 && hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
+		/* vm_pgoff: 1 -- TPTR */
+		if (io_remap_pfn_range(vma, vma->vm_start,
+				       hr_dev->tptr_dma_addr >> PAGE_SHIFT,
+				       hr_dev->tptr_size,
+				       vma->vm_page_prot))
+			return -EAGAIN;
+	} else
 		return -EINVAL;
-	}
 
 	return 0;
 }
@@ -605,7 +433,7 @@
 	spin_lock_init(&iboe->lock);
 
 	ib_dev = &hr_dev->ib_dev;
-	strlcpy(ib_dev->name, "hisi_%d", IB_DEVICE_NAME_MAX);
+	strlcpy(ib_dev->name, "hns_%d", IB_DEVICE_NAME_MAX);
 
 	ib_dev->owner			= THIS_MODULE;
 	ib_dev->node_type		= RDMA_NODE_IB_CA;
@@ -639,6 +467,8 @@
 	ib_dev->get_link_layer		= hns_roce_get_link_layer;
 	ib_dev->get_netdev		= hns_roce_get_netdev;
 	ib_dev->query_gid		= hns_roce_query_gid;
+	ib_dev->add_gid			= hns_roce_add_gid;
+	ib_dev->del_gid			= hns_roce_del_gid;
 	ib_dev->query_pkey		= hns_roce_query_pkey;
 	ib_dev->alloc_ucontext		= hns_roce_alloc_ucontext;
 	ib_dev->dealloc_ucontext	= hns_roce_dealloc_ucontext;
@@ -681,32 +511,22 @@
 		return ret;
 	}
 
-	ret = hns_roce_setup_mtu_gids(hr_dev);
+	ret = hns_roce_setup_mtu_mac(hr_dev);
 	if (ret) {
-		dev_err(dev, "roce_setup_mtu_gids failed!\n");
-		goto error_failed_setup_mtu_gids;
+		dev_err(dev, "setup_mtu_mac failed!\n");
+		goto error_failed_setup_mtu_mac;
 	}
 
 	iboe->nb.notifier_call = hns_roce_netdev_event;
 	ret = register_netdevice_notifier(&iboe->nb);
 	if (ret) {
 		dev_err(dev, "register_netdevice_notifier failed!\n");
-		goto error_failed_setup_mtu_gids;
-	}
-
-	iboe->nb_inet.notifier_call = hns_roce_inet_event;
-	ret = register_inetaddr_notifier(&iboe->nb_inet);
-	if (ret) {
-		dev_err(dev, "register inet addr notifier failed!\n");
-		goto error_failed_register_inetaddr_notifier;
+		goto error_failed_setup_mtu_mac;
 	}
 
 	return 0;
 
-error_failed_register_inetaddr_notifier:
-	unregister_netdevice_notifier(&iboe->nb);
-
-error_failed_setup_mtu_gids:
+error_failed_setup_mtu_mac:
 	ib_unregister_device(ib_dev);
 
 	return ret;
@@ -940,10 +760,10 @@
 }
 
 /**
-* hns_roce_setup_hca - setup host channel adapter
-* @hr_dev: pointer to hns roce device
-* Return : int
-*/
+ * hns_roce_setup_hca - setup host channel adapter
+ * @hr_dev: pointer to hns roce device
+ * Return : int
+ */
 static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
 {
 	int ret;
@@ -1008,11 +828,11 @@
 }
 
 /**
-* hns_roce_probe - RoCE driver entrance
-* @pdev: pointer to platform device
-* Return : int
-*
-*/
+ * hns_roce_probe - RoCE driver entrance
+ * @pdev: pointer to platform device
+ * Return : int
+ *
+ */
 static int hns_roce_probe(struct platform_device *pdev)
 {
 	int ret;
@@ -1023,9 +843,6 @@
 	if (!hr_dev)
 		return -ENOMEM;
 
-	memset((u8 *)hr_dev + sizeof(struct ib_device), 0,
-		sizeof(struct hns_roce_dev) - sizeof(struct ib_device));
-
 	hr_dev->pdev = pdev;
 	platform_set_drvdata(pdev, hr_dev);
 
@@ -1125,9 +942,9 @@
 }
 
 /**
-* hns_roce_remove - remove RoCE device
-* @pdev: pointer to platform device
-*/
+ * hns_roce_remove - remove RoCE device
+ * @pdev: pointer to platform device
+ */
 static int hns_roce_remove(struct platform_device *pdev)
 {
 	struct hns_roce_dev *hr_dev = platform_get_drvdata(pdev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index fb87883..4139abe 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -42,7 +42,7 @@
 	return (u32)(ind >> 24) | (ind << 8);
 }
 
-static unsigned long key_to_hw_index(u32 key)
+unsigned long key_to_hw_index(u32 key)
 {
 	return (key << 24) | (key >> 8);
 }
@@ -53,16 +53,16 @@
 {
 	return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0,
 				 HNS_ROCE_CMD_SW2HW_MPT,
-				 HNS_ROCE_CMD_TIME_CLASS_B);
+				 HNS_ROCE_CMD_TIMEOUT_MSECS);
 }
 
-static int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
+int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
 			      struct hns_roce_cmd_mailbox *mailbox,
 			      unsigned long mpt_index)
 {
 	return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0,
 				 mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT,
-				 HNS_ROCE_CMD_TIME_CLASS_B);
+				 HNS_ROCE_CMD_TIMEOUT_MSECS);
 }
 
 static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order,
@@ -137,11 +137,13 @@
 
 	for (i = 0; i <= buddy->max_order; ++i) {
 		s = BITS_TO_LONGS(1 << (buddy->max_order - i));
-		buddy->bits[i] = kmalloc_array(s, sizeof(long), GFP_KERNEL);
-		if (!buddy->bits[i])
-			goto err_out_free;
-
-		bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i));
+		buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL |
+					 __GFP_NOWARN);
+		if (!buddy->bits[i]) {
+			buddy->bits[i] = vzalloc(s * sizeof(long));
+			if (!buddy->bits[i])
+				goto err_out_free;
+		}
 	}
 
 	set_bit(0, buddy->bits[buddy->max_order]);
@@ -151,7 +153,7 @@
 
 err_out_free:
 	for (i = 0; i <= buddy->max_order; ++i)
-		kfree(buddy->bits[i]);
+		kvfree(buddy->bits[i]);
 
 err_out:
 	kfree(buddy->bits);
@@ -164,7 +166,7 @@
 	int i;
 
 	for (i = 0; i <= buddy->max_order; ++i)
-		kfree(buddy->bits[i]);
+		kvfree(buddy->bits[i]);
 
 	kfree(buddy->bits);
 	kfree(buddy->num_free);
@@ -287,7 +289,7 @@
 	}
 
 	hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
-			     key_to_hw_index(mr->key));
+			     key_to_hw_index(mr->key), BITMAP_NO_RR);
 }
 
 static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
@@ -605,13 +607,20 @@
 
 int hns_roce_dereg_mr(struct ib_mr *ibmr)
 {
+	struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
 	struct hns_roce_mr *mr = to_hr_mr(ibmr);
+	int ret = 0;
 
-	hns_roce_mr_free(to_hr_dev(ibmr->device), mr);
-	if (mr->umem)
-		ib_umem_release(mr->umem);
+	if (hr_dev->hw->dereg_mr) {
+		ret = hr_dev->hw->dereg_mr(hr_dev, mr);
+	} else {
+		hns_roce_mr_free(hr_dev, mr);
 
-	kfree(mr);
+		if (mr->umem)
+			ib_umem_release(mr->umem);
 
-	return 0;
+		kfree(mr);
+	}
+
+	return ret;
 }
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index 05db7d5..a64500f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -40,7 +40,7 @@
 
 static void hns_roce_pd_free(struct hns_roce_dev *hr_dev, unsigned long pdn)
 {
-	hns_roce_bitmap_free(&hr_dev->pd_bitmap, pdn);
+	hns_roce_bitmap_free(&hr_dev->pd_bitmap, pdn, BITMAP_NO_RR);
 }
 
 int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev)
@@ -121,7 +121,8 @@
 
 void hns_roce_uar_free(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
 {
-	hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->index);
+	hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->index,
+			     BITMAP_NO_RR);
 }
 
 int hns_roce_init_uar_table(struct hns_roce_dev *hr_dev)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index e86dd8d..f036f32 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -37,7 +37,7 @@
 #include "hns_roce_common.h"
 #include "hns_roce_device.h"
 #include "hns_roce_hem.h"
-#include "hns_roce_user.h"
+#include <rdma/hns-abi.h>
 
 #define SQP_NUM				(2 * HNS_ROCE_MAX_PORTS)
 
@@ -250,7 +250,7 @@
 	if (base_qpn < SQP_NUM)
 		return;
 
-	hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt);
+	hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, BITMAP_RR);
 }
 
 static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index 8ec09e4..da2eb5a 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -112,9 +112,12 @@
 #define I40IW_DRV_OPT_MCAST_LOGPORT_MAP    0x00000800
 
 #define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types)
-#define IW_CFG_FPM_QP_COUNT		32768
-#define I40IW_MAX_PAGES_PER_FMR		512
-#define I40IW_MIN_PAGES_PER_FMR		1
+#define IW_CFG_FPM_QP_COUNT               32768
+#define I40IW_MAX_PAGES_PER_FMR           512
+#define I40IW_MIN_PAGES_PER_FMR           1
+#define I40IW_CQP_COMPL_RQ_WQE_FLUSHED    2
+#define I40IW_CQP_COMPL_SQ_WQE_FLUSHED    3
+#define I40IW_CQP_COMPL_RQ_SQ_WQE_FLUSHED 4
 
 #define I40IW_MTU_TO_MSS		40
 #define I40IW_DEFAULT_MSS		1460
@@ -210,6 +213,12 @@
 	u32 ceq_id;
 };
 
+struct l2params_work {
+	struct work_struct work;
+	struct i40iw_device *iwdev;
+	struct i40iw_l2params l2params;
+};
+
 #define I40IW_MSIX_TABLE_SIZE   65
 
 struct virtchnl_work {
@@ -227,6 +236,7 @@
 	struct net_device *netdev;
 	wait_queue_head_t vchnl_waitq;
 	struct i40iw_sc_dev sc_dev;
+	struct i40iw_sc_vsi vsi;
 	struct i40iw_handler *hdl;
 	struct i40e_info *ldev;
 	struct i40e_client *client;
@@ -280,7 +290,6 @@
 	u32 sd_type;
 	struct workqueue_struct *param_wq;
 	atomic_t params_busy;
-	u32 mss;
 	enum init_completion_state init_state;
 	u16 mac_ip_table_idx;
 	atomic_t vchnl_msgs;
@@ -297,6 +306,14 @@
 	u32 mr_stagmask;
 	u32 mpa_version;
 	bool dcb;
+	bool closing;
+	bool reset;
+	u32 used_pds;
+	u32 used_cqs;
+	u32 used_mrs;
+	u32 used_qps;
+	wait_queue_head_t close_wq;
+	atomic64_t use_count;
 };
 
 struct i40iw_ib_device {
@@ -498,7 +515,7 @@
 
 int i40iw_register_rdma_device(struct i40iw_device *iwdev);
 void i40iw_port_ibevent(struct i40iw_device *iwdev);
-int i40iw_cm_disconn(struct i40iw_qp *);
+void i40iw_cm_disconn(struct i40iw_qp *iwqp);
 void i40iw_cm_disconn_worker(void *);
 int mini_cm_recv_pkt(struct i40iw_cm_core *, struct i40iw_device *,
 		     struct sk_buff *);
@@ -508,20 +525,26 @@
 enum i40iw_status_code i40iw_add_mac_addr(struct i40iw_device *iwdev,
 					  u8 *mac_addr, u8 *mac_index);
 int i40iw_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *);
+void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq);
 
 void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev);
 void i40iw_add_pdusecount(struct i40iw_pd *iwpd);
+void i40iw_rem_devusecount(struct i40iw_device *iwdev);
+void i40iw_add_devusecount(struct i40iw_device *iwdev);
 void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp,
 			struct i40iw_modify_qp_info *info, bool wait);
 
+void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev,
+			     struct i40iw_sc_qp *qp,
+			     bool suspend);
 enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
 					  struct i40iw_cm_info *cminfo,
 					  enum i40iw_quad_entry_type etype,
 					  enum i40iw_quad_hash_manage_type mtype,
 					  void *cmnode,
 					  bool wait);
-void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf);
-void i40iw_free_sqbuf(struct i40iw_sc_dev *dev, void *bufp);
+void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf);
+void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp);
 void i40iw_free_qp_resources(struct i40iw_device *iwdev,
 			     struct i40iw_qp *iwqp,
 			     u32 qp_num);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 8563769..95a0586 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -68,13 +68,13 @@
 
 /**
  * i40iw_free_sqbuf - put back puda buffer if refcount = 0
- * @dev: FPK device
+ * @vsi: pointer to vsi structure
  * @buf: puda buffer to free
  */
-void i40iw_free_sqbuf(struct i40iw_sc_dev *dev, void *bufp)
+void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp)
 {
 	struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)bufp;
-	struct i40iw_puda_rsrc *ilq = dev->ilq;
+	struct i40iw_puda_rsrc *ilq = vsi->ilq;
 
 	if (!atomic_dec_return(&buf->refcount))
 		i40iw_puda_ret_bufpool(ilq, buf);
@@ -221,6 +221,7 @@
 	memcpy(cm_info->rem_addr, cm_node->rem_addr, sizeof(cm_info->rem_addr));
 	cm_info->loc_port = cm_node->loc_port;
 	cm_info->rem_port = cm_node->rem_port;
+	cm_info->user_pri = cm_node->user_pri;
 }
 
 /**
@@ -271,6 +272,7 @@
 		event.provider_data = (void *)cm_node;
 		event.private_data = (void *)cm_node->pdata_buf;
 		event.private_data_len = (u8)cm_node->pdata.size;
+		event.ird = cm_node->ird_size;
 		break;
 	case IW_CM_EVENT_CONNECT_REPLY:
 		i40iw_get_cmevent_info(cm_node, cm_id, &event);
@@ -335,13 +337,13 @@
  */
 static void i40iw_free_retrans_entry(struct i40iw_cm_node *cm_node)
 {
-	struct i40iw_sc_dev *dev = cm_node->dev;
+	struct i40iw_device *iwdev = cm_node->iwdev;
 	struct i40iw_timer_entry *send_entry;
 
 	send_entry = cm_node->send_entry;
 	if (send_entry) {
 		cm_node->send_entry = NULL;
-		i40iw_free_sqbuf(dev, (void *)send_entry->sqbuf);
+		i40iw_free_sqbuf(&iwdev->vsi, (void *)send_entry->sqbuf);
 		kfree(send_entry);
 		atomic_dec(&cm_node->ref_count);
 	}
@@ -360,15 +362,6 @@
 	spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
 }
 
-static bool is_remote_ne020_or_chelsio(struct i40iw_cm_node *cm_node)
-{
-	if ((cm_node->rem_mac[0] == 0x0) &&
-	    (((cm_node->rem_mac[1] == 0x12) && (cm_node->rem_mac[2] == 0x55)) ||
-	     ((cm_node->rem_mac[1] == 0x07 && (cm_node->rem_mac[2] == 0x43)))))
-		return true;
-	return false;
-}
-
 /**
  * i40iw_form_cm_frame - get a free packet and build frame
  * @cm_node: connection's node ionfo to use in frame
@@ -384,7 +377,7 @@
 						  u8 flags)
 {
 	struct i40iw_puda_buf *sqbuf;
-	struct i40iw_sc_dev *dev = cm_node->dev;
+	struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi;
 	u8 *buf;
 
 	struct tcphdr *tcph;
@@ -396,8 +389,9 @@
 	u32 opts_len = 0;
 	u32 pd_len = 0;
 	u32 hdr_len = 0;
+	u16 vtag;
 
-	sqbuf = i40iw_puda_get_bufpool(dev->ilq);
+	sqbuf = i40iw_puda_get_bufpool(vsi->ilq);
 	if (!sqbuf)
 		return NULL;
 	buf = sqbuf->mem.va;
@@ -408,11 +402,8 @@
 	if (hdr)
 		hdr_len = hdr->size;
 
-	if (pdata) {
+	if (pdata)
 		pd_len = pdata->size;
-		if (!is_remote_ne020_or_chelsio(cm_node))
-			pd_len += MPA_ZERO_PAD_LEN;
-	}
 
 	if (cm_node->vlan_id < VLAN_TAG_PRESENT)
 		eth_hlen += 4;
@@ -445,7 +436,8 @@
 		ether_addr_copy(ethh->h_source, cm_node->loc_mac);
 		if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
 			((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q);
-			((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(cm_node->vlan_id);
+			vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id;
+			((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag);
 
 			((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IP);
 		} else {
@@ -454,7 +446,7 @@
 
 		iph->version = IPVERSION;
 		iph->ihl = 5;	/* 5 * 4Byte words, IP headr len */
-		iph->tos = 0;
+		iph->tos = cm_node->tos;
 		iph->tot_len = htons(packetsize);
 		iph->id = htons(++cm_node->tcp_cntxt.loc_id);
 
@@ -474,13 +466,15 @@
 		ether_addr_copy(ethh->h_source, cm_node->loc_mac);
 		if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
 			((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q);
-			((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(cm_node->vlan_id);
+			vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id;
+			((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag);
 			((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IPV6);
 		} else {
 			ethh->h_proto = htons(ETH_P_IPV6);
 		}
 		ip6h->version = 6;
-		ip6h->flow_lbl[0] = 0;
+		ip6h->priority = cm_node->tos >> 4;
+		ip6h->flow_lbl[0] = cm_node->tos << 4;
 		ip6h->flow_lbl[1] = 0;
 		ip6h->flow_lbl[2] = 0;
 		ip6h->payload_len = htons(packetsize - sizeof(*ip6h));
@@ -1065,7 +1059,7 @@
 			    int send_retrans,
 			    int close_when_complete)
 {
-	struct i40iw_sc_dev *dev = cm_node->dev;
+	struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi;
 	struct i40iw_cm_core *cm_core = cm_node->cm_core;
 	struct i40iw_timer_entry *new_send;
 	int ret = 0;
@@ -1074,7 +1068,7 @@
 
 	new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
 	if (!new_send) {
-		i40iw_free_sqbuf(cm_node->dev, (void *)sqbuf);
+		i40iw_free_sqbuf(vsi, (void *)sqbuf);
 		return -ENOMEM;
 	}
 	new_send->retrycount = I40IW_DEFAULT_RETRYS;
@@ -1089,7 +1083,7 @@
 		new_send->timetosend += (HZ / 10);
 		if (cm_node->close_entry) {
 			kfree(new_send);
-			i40iw_free_sqbuf(cm_node->dev, (void *)sqbuf);
+			i40iw_free_sqbuf(vsi, (void *)sqbuf);
 			i40iw_pr_err("already close entry\n");
 			return -EINVAL;
 		}
@@ -1104,7 +1098,7 @@
 		new_send->timetosend = jiffies + I40IW_RETRY_TIMEOUT;
 
 		atomic_inc(&sqbuf->refcount);
-		i40iw_puda_send_buf(dev->ilq, sqbuf);
+		i40iw_puda_send_buf(vsi->ilq, sqbuf);
 		if (!send_retrans) {
 			i40iw_cleanup_retrans_entry(cm_node);
 			if (close_when_complete)
@@ -1201,6 +1195,7 @@
 	struct i40iw_cm_node *cm_node;
 	struct i40iw_timer_entry *send_entry, *close_entry;
 	struct list_head *list_core_temp;
+	struct i40iw_sc_vsi *vsi;
 	struct list_head *list_node;
 	struct i40iw_cm_core *cm_core = (struct i40iw_cm_core *)pass;
 	u32 settimer = 0;
@@ -1276,9 +1271,10 @@
 		cm_node->cm_core->stats_pkt_retrans++;
 		spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
 
+		vsi = &cm_node->iwdev->vsi;
 		dev = cm_node->dev;
 		atomic_inc(&send_entry->sqbuf->refcount);
-		i40iw_puda_send_buf(dev->ilq, send_entry->sqbuf);
+		i40iw_puda_send_buf(vsi->ilq, send_entry->sqbuf);
 		spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
 		if (send_entry->send_retrans) {
 			send_entry->retranscount--;
@@ -1379,10 +1375,11 @@
 static void i40iw_send_ack(struct i40iw_cm_node *cm_node)
 {
 	struct i40iw_puda_buf *sqbuf;
+	struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi;
 
 	sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK);
 	if (sqbuf)
-		i40iw_puda_send_buf(cm_node->dev->ilq, sqbuf);
+		i40iw_puda_send_buf(vsi->ilq, sqbuf);
 	else
 		i40iw_pr_err("no sqbuf\n");
 }
@@ -1564,9 +1561,15 @@
 		memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
 		       sizeof(cm_info->loc_addr));
 		cm_info->vlan_id = child_listen_node->vlan_id;
-		ret = i40iw_manage_qhash(iwdev, cm_info,
-					 I40IW_QHASH_TYPE_TCP_SYN,
-					 I40IW_QHASH_MANAGE_TYPE_DELETE, NULL, false);
+		if (child_listen_node->qhash_set) {
+			ret = i40iw_manage_qhash(iwdev, cm_info,
+						 I40IW_QHASH_TYPE_TCP_SYN,
+						 I40IW_QHASH_MANAGE_TYPE_DELETE,
+						 NULL, false);
+			child_listen_node->qhash_set = false;
+		} else {
+			ret = I40IW_SUCCESS;
+		}
 		i40iw_debug(&iwdev->sc_dev,
 			    I40IW_DEBUG_CM,
 			    "freed pointer = %p\n",
@@ -1591,9 +1594,10 @@
 static struct net_device *i40iw_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac)
 {
 	struct net_device *ip_dev = NULL;
-#if IS_ENABLED(CONFIG_IPV6)
 	struct in6_addr laddr6;
 
+	if (!IS_ENABLED(CONFIG_IPV6))
+		return NULL;
 	i40iw_copy_ip_htonl(laddr6.in6_u.u6_addr32, addr);
 	if (vlan_id)
 		*vlan_id = I40IW_NO_VLAN;
@@ -1610,7 +1614,6 @@
 		}
 	}
 	rcu_read_unlock();
-#endif
 	return ip_dev;
 }
 
@@ -1646,7 +1649,7 @@
 {
 	struct net_device *ip_dev;
 	struct inet6_dev *idev;
-	struct inet6_ifaddr *ifp;
+	struct inet6_ifaddr *ifp, *tmp;
 	enum i40iw_status_code ret = 0;
 	struct i40iw_cm_listener *child_listen_node;
 	unsigned long flags;
@@ -1661,7 +1664,7 @@
 				i40iw_pr_err("idev == NULL\n");
 				break;
 			}
-			list_for_each_entry(ifp, &idev->addr_list, if_list) {
+			list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
 				i40iw_debug(&iwdev->sc_dev,
 					    I40IW_DEBUG_CM,
 					    "IP=%pI6, vlan_id=%d, MAC=%pM\n",
@@ -1675,7 +1678,6 @@
 					    "Allocating child listener %p\n",
 					    child_listen_node);
 				if (!child_listen_node) {
-					i40iw_pr_err("listener memory allocation\n");
 					ret = I40IW_ERR_NO_MEMORY;
 					goto exit;
 				}
@@ -1695,6 +1697,7 @@
 							 I40IW_QHASH_MANAGE_TYPE_ADD,
 							 NULL, true);
 				if (!ret) {
+					child_listen_node->qhash_set = true;
 					spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
 					list_add(&child_listen_node->child_listen_list,
 						 &cm_parent_listen_node->child_listen_list);
@@ -1751,7 +1754,6 @@
 					    "Allocating child listener %p\n",
 					    child_listen_node);
 				if (!child_listen_node) {
-					i40iw_pr_err("listener memory allocation\n");
 					in_dev_put(idev);
 					ret = I40IW_ERR_NO_MEMORY;
 					goto exit;
@@ -1773,6 +1775,7 @@
 							 NULL,
 							 true);
 				if (!ret) {
+					child_listen_node->qhash_set = true;
 					spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
 					list_add(&child_listen_node->child_listen_list,
 						 &cm_parent_listen_node->child_listen_list);
@@ -1880,6 +1883,7 @@
 			nfo.loc_port = listener->loc_port;
 			nfo.ipv4 = listener->ipv4;
 			nfo.vlan_id = listener->vlan_id;
+			nfo.user_pri = listener->user_pri;
 
 			if (!list_empty(&listener->child_listen_list)) {
 				i40iw_del_multiple_qhash(listener->iwdev, &nfo, listener);
@@ -2138,6 +2142,20 @@
 	/* set our node specific transport info */
 	cm_node->ipv4 = cm_info->ipv4;
 	cm_node->vlan_id = cm_info->vlan_id;
+	if ((cm_node->vlan_id == I40IW_NO_VLAN) && iwdev->dcb)
+		cm_node->vlan_id = 0;
+	cm_node->tos = cm_info->tos;
+	cm_node->user_pri = cm_info->user_pri;
+	if (listener) {
+		if (listener->tos != cm_info->tos)
+			i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB,
+				    "application TOS[%d] and remote client TOS[%d] mismatch\n",
+				     listener->tos, cm_info->tos);
+		cm_node->tos = max(listener->tos, cm_info->tos);
+		cm_node->user_pri = rt_tos2priority(cm_node->tos);
+		i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "listener: TOS:[%d] UP:[%d]\n",
+			    cm_node->tos, cm_node->user_pri);
+	}
 	memcpy(cm_node->loc_addr, cm_info->loc_addr, sizeof(cm_node->loc_addr));
 	memcpy(cm_node->rem_addr, cm_info->rem_addr, sizeof(cm_node->rem_addr));
 	cm_node->loc_port = cm_info->loc_port;
@@ -2162,7 +2180,7 @@
 			I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE;
 	ts = current_kernel_time();
 	cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec;
-	cm_node->tcp_cntxt.mss = iwdev->mss;
+	cm_node->tcp_cntxt.mss = iwdev->vsi.mss;
 
 	cm_node->iwdev = iwdev;
 	cm_node->dev = &iwdev->sc_dev;
@@ -2236,7 +2254,7 @@
 		i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
 	} else {
 		if (!i40iw_listen_port_in_use(cm_core, cm_node->loc_port) &&
-		    cm_node->apbvt_set && cm_node->iwdev) {
+		    cm_node->apbvt_set) {
 			i40iw_manage_apbvt(cm_node->iwdev,
 					   cm_node->loc_port,
 					   I40IW_MANAGE_APBVT_DEL);
@@ -2861,7 +2879,7 @@
 	/* create a CM connection node */
 	cm_node = i40iw_make_cm_node(cm_core, iwdev, cm_info, NULL);
 	if (!cm_node)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 	/* set our node side to client (active) side */
 	cm_node->tcp_cntxt.client = 1;
 	cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
@@ -2874,7 +2892,8 @@
 						cm_node->vlan_id,
 						I40IW_CM_LISTENER_ACTIVE_STATE);
 		if (!loopback_remotelistener) {
-			i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
+			i40iw_rem_ref_cm_node(cm_node);
+			return ERR_PTR(-ECONNREFUSED);
 		} else {
 			loopback_cm_info = *cm_info;
 			loopback_cm_info.loc_port = cm_info->rem_port;
@@ -2887,7 +2906,7 @@
 								 loopback_remotelistener);
 			if (!loopback_remotenode) {
 				i40iw_rem_ref_cm_node(cm_node);
-				return NULL;
+				return ERR_PTR(-ENOMEM);
 			}
 			cm_core->stats_loopbacks++;
 			loopback_remotenode->loopbackpartner = cm_node;
@@ -3041,10 +3060,10 @@
 /**
  * i40iw_receive_ilq - recv an ETHERNET packet, and process it
  * through CM
- * @dev: FPK dev struct
+ * @vsi: pointer to the vsi structure
  * @rbuf: receive buffer
  */
-void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf)
+void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf)
 {
 	struct i40iw_cm_node *cm_node;
 	struct i40iw_cm_listener *listener;
@@ -3052,9 +3071,11 @@
 	struct ipv6hdr *ip6h;
 	struct tcphdr *tcph;
 	struct i40iw_cm_info cm_info;
+	struct i40iw_sc_dev *dev = vsi->dev;
 	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
 	struct i40iw_cm_core *cm_core = &iwdev->cm_core;
 	struct vlan_ethhdr *ethh;
+	u16 vtag;
 
 	/* if vlan, then maclen = 18 else 14 */
 	iph = (struct iphdr *)rbuf->iph;
@@ -3068,7 +3089,9 @@
 	ethh = (struct vlan_ethhdr *)rbuf->mem.va;
 
 	if (ethh->h_vlan_proto == htons(ETH_P_8021Q)) {
-		cm_info.vlan_id = ntohs(ethh->h_vlan_TCI) & VLAN_VID_MASK;
+		vtag = ntohs(ethh->h_vlan_TCI);
+		cm_info.user_pri = (vtag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+		cm_info.vlan_id = vtag & VLAN_VID_MASK;
 		i40iw_debug(cm_core->dev,
 			    I40IW_DEBUG_CM,
 			    "%s vlan_id=%d\n",
@@ -3083,6 +3106,7 @@
 		cm_info.loc_addr[0] = ntohl(iph->daddr);
 		cm_info.rem_addr[0] = ntohl(iph->saddr);
 		cm_info.ipv4 = true;
+		cm_info.tos = iph->tos;
 	} else {
 		ip6h = (struct ipv6hdr *)rbuf->iph;
 		i40iw_copy_ip_ntohl(cm_info.loc_addr,
@@ -3090,6 +3114,7 @@
 		i40iw_copy_ip_ntohl(cm_info.rem_addr,
 				    ip6h->saddr.in6_u.u6_addr32);
 		cm_info.ipv4 = false;
+		cm_info.tos = (ip6h->priority << 4) | (ip6h->flow_lbl[0] >> 4);
 	}
 	cm_info.loc_port = ntohs(tcph->dest);
 	cm_info.rem_port = ntohs(tcph->source);
@@ -3309,6 +3334,8 @@
 
 	ctx_info->tcp_info_valid = true;
 	ctx_info->iwarp_info_valid = true;
+	ctx_info->add_to_qoslist = true;
+	ctx_info->user_pri = cm_node->user_pri;
 
 	i40iw_init_tcp_ctx(cm_node, &tcp_info, iwqp);
 	if (cm_node->snd_mark_en) {
@@ -3320,33 +3347,47 @@
 	cm_node->state = I40IW_CM_STATE_OFFLOADED;
 	tcp_info.tcp_state = I40IW_TCP_STATE_ESTABLISHED;
 	tcp_info.src_mac_addr_idx = iwdev->mac_ip_table_idx;
+	tcp_info.tos = cm_node->tos;
 
 	dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, (u64 *)(iwqp->host_ctx.va), ctx_info);
 
 	/* once tcp_info is set, no need to do it again */
 	ctx_info->tcp_info_valid = false;
 	ctx_info->iwarp_info_valid = false;
+	ctx_info->add_to_qoslist = false;
 }
 
 /**
  * i40iw_cm_disconn - when a connection is being closed
  * @iwqp: associate qp for the connection
  */
-int i40iw_cm_disconn(struct i40iw_qp *iwqp)
+void i40iw_cm_disconn(struct i40iw_qp *iwqp)
 {
 	struct disconn_work *work;
 	struct i40iw_device *iwdev = iwqp->iwdev;
 	struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+	unsigned long flags;
 
 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
 	if (!work)
-		return -ENOMEM;	/* Timer will clean up */
+		return;	/* Timer will clean up */
 
+	spin_lock_irqsave(&iwdev->qptable_lock, flags);
+	if (!iwdev->qp_table[iwqp->ibqp.qp_num]) {
+		spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
+		i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM,
+			    "%s qp_id %d is already freed\n",
+			     __func__, iwqp->ibqp.qp_num);
+		kfree(work);
+		return;
+	}
 	i40iw_add_ref(&iwqp->ibqp);
+	spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
+
 	work->iwqp = iwqp;
 	INIT_WORK(&work->work, i40iw_disconnect_worker);
 	queue_work(cm_core->disconn_wq, &work->work);
-	return 0;
+	return;
 }
 
 /**
@@ -3432,7 +3473,7 @@
 		 *terminate-handler to issue cm_disconn which can re-free
 		 *a QP even after its refcnt=0.
 		 */
-		del_timer(&iwqp->terminate_timer);
+		i40iw_terminate_del_timer(qp);
 		if (!iwqp->flush_issued) {
 			iwqp->flush_issued = 1;
 			issue_flush = 1;
@@ -3462,7 +3503,7 @@
 		/* Flush the queues */
 		i40iw_flush_wqes(iwdev, iwqp);
 
-		if (qp->term_flags) {
+		if (qp->term_flags && iwqp->ibqp.event_handler) {
 			ibevent.device = iwqp->ibqp.device;
 			ibevent.event = (qp->eventtype == TERM_EVENT_QP_FATAL) ?
 					IB_EVENT_QP_FATAL : IB_EVENT_QP_ACCESS_ERR;
@@ -3571,7 +3612,7 @@
 	iwqp->cm_node = (void *)cm_node;
 	cm_node->iwqp = iwqp;
 
-	buf_len = conn_param->private_data_len + I40IW_MAX_IETF_SIZE + MPA_ZERO_PAD_LEN;
+	buf_len = conn_param->private_data_len + I40IW_MAX_IETF_SIZE;
 
 	status = i40iw_allocate_dma_mem(dev->hw, &iwqp->ietf_mem, buf_len, 1);
 
@@ -3605,18 +3646,10 @@
 		iwqp->lsmm_mr = ibmr;
 		if (iwqp->page)
 			iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
-		if (is_remote_ne020_or_chelsio(cm_node))
-			dev->iw_priv_qp_ops->qp_send_lsmm(
-							&iwqp->sc_qp,
+		dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp,
 							iwqp->ietf_mem.va,
 							(accept.size + conn_param->private_data_len),
 							ibmr->lkey);
-		else
-			dev->iw_priv_qp_ops->qp_send_lsmm(
-							&iwqp->sc_qp,
-							iwqp->ietf_mem.va,
-							(accept.size + conn_param->private_data_len + MPA_ZERO_PAD_LEN),
-							ibmr->lkey);
 
 	} else {
 		if (iwqp->page)
@@ -3714,6 +3747,7 @@
 	struct sockaddr_in6 *raddr6;
 	bool qhash_set = false;
 	int apbvt_set = 0;
+	int err = 0;
 	enum i40iw_status_code status;
 
 	ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
@@ -3759,6 +3793,10 @@
 		i40iw_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id, NULL);
 	}
 	cm_info.cm_id = cm_id;
+	cm_info.tos = cm_id->tos;
+	cm_info.user_pri = rt_tos2priority(cm_id->tos);
+	i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "%s TOS:[%d] UP:[%d]\n",
+		    __func__, cm_id->tos, cm_info.user_pri);
 	if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) ||
 	    (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32,
 				     raddr6->sin6_addr.in6_u.u6_addr32,
@@ -3790,8 +3828,11 @@
 				       conn_param->private_data_len,
 				       (void *)conn_param->private_data,
 				       &cm_info);
-	if (!cm_node)
-		goto err;
+
+	if (IS_ERR(cm_node)) {
+		err = PTR_ERR(cm_node);
+		goto err_out;
+	}
 
 	i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
 	if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
@@ -3805,10 +3846,12 @@
 	iwqp->cm_id = cm_id;
 	i40iw_add_ref(&iwqp->ibqp);
 
-	if (cm_node->state == I40IW_CM_STATE_SYN_SENT) {
-		if (i40iw_send_syn(cm_node, 0)) {
+	if (cm_node->state != I40IW_CM_STATE_OFFLOADED) {
+		cm_node->state = I40IW_CM_STATE_SYN_SENT;
+		err = i40iw_send_syn(cm_node, 0);
+		if (err) {
 			i40iw_rem_ref_cm_node(cm_node);
-			goto err;
+			goto err_out;
 		}
 	}
 
@@ -3820,24 +3863,25 @@
 		    cm_node->cm_id);
 	return 0;
 
-err:
-	if (cm_node) {
-		if (cm_node->ipv4)
-			i40iw_debug(cm_node->dev,
-				    I40IW_DEBUG_CM,
-				    "Api - connect() FAILED: dest addr=%pI4",
-				    cm_node->rem_addr);
-		else
-			i40iw_debug(cm_node->dev, I40IW_DEBUG_CM,
-				    "Api - connect() FAILED: dest addr=%pI6",
-				    cm_node->rem_addr);
-	}
-	i40iw_manage_qhash(iwdev,
-			   &cm_info,
-			   I40IW_QHASH_TYPE_TCP_ESTABLISHED,
-			   I40IW_QHASH_MANAGE_TYPE_DELETE,
-			   NULL,
-			   false);
+err_out:
+	if (cm_info.ipv4)
+		i40iw_debug(&iwdev->sc_dev,
+			    I40IW_DEBUG_CM,
+			    "Api - connect() FAILED: dest addr=%pI4",
+			    cm_info.rem_addr);
+	else
+		i40iw_debug(&iwdev->sc_dev,
+			    I40IW_DEBUG_CM,
+			    "Api - connect() FAILED: dest addr=%pI6",
+			    cm_info.rem_addr);
+
+	if (qhash_set)
+		i40iw_manage_qhash(iwdev,
+				   &cm_info,
+				   I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+				   I40IW_QHASH_MANAGE_TYPE_DELETE,
+				   NULL,
+				   false);
 
 	if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core,
 						   cm_info.loc_port))
@@ -3846,7 +3890,7 @@
 				   I40IW_MANAGE_APBVT_DEL);
 	cm_id->rem_ref(cm_id);
 	iwdev->cm_core.stats_connect_errs++;
-	return -ENOMEM;
+	return err;
 }
 
 /**
@@ -3904,6 +3948,10 @@
 
 	cm_id->provider_data = cm_listen_node;
 
+	cm_listen_node->tos = cm_id->tos;
+	cm_listen_node->user_pri = rt_tos2priority(cm_id->tos);
+	cm_info.user_pri = cm_listen_node->user_pri;
+
 	if (!cm_listen_node->reused_node) {
 		if (wildcard) {
 			if (cm_info.ipv4)
@@ -4124,3 +4172,158 @@
 
 	queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
 }
+
+/**
+ * i40iw_qhash_ctrl - enable/disable qhash for list
+ * @iwdev: device pointer
+ * @parent_listen_node: parent listen node
+ * @nfo: cm info node
+ * @ipaddr: Pointer to IPv4 or IPv6 address
+ * @ipv4: flag indicating IPv4 when true
+ * @ifup: flag indicating interface up when true
+ *
+ * Enables or disables the qhash for the node in the child
+ * listen list that matches ipaddr. If no matching IP was found
+ * it will allocate and add a new child listen node to the
+ * parent listen node. The listen_list_lock is assumed to be
+ * held when called.
+ */
+static void i40iw_qhash_ctrl(struct i40iw_device *iwdev,
+			     struct i40iw_cm_listener *parent_listen_node,
+			     struct i40iw_cm_info *nfo,
+			     u32 *ipaddr, bool ipv4, bool ifup)
+{
+	struct list_head *child_listen_list = &parent_listen_node->child_listen_list;
+	struct i40iw_cm_listener *child_listen_node;
+	struct list_head *pos, *tpos;
+	enum i40iw_status_code ret;
+	bool node_allocated = false;
+	enum i40iw_quad_hash_manage_type op =
+		ifup ? I40IW_QHASH_MANAGE_TYPE_ADD : I40IW_QHASH_MANAGE_TYPE_DELETE;
+
+	list_for_each_safe(pos, tpos, child_listen_list) {
+		child_listen_node =
+			list_entry(pos,
+				   struct i40iw_cm_listener,
+				   child_listen_list);
+		if (!memcmp(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16))
+			goto set_qhash;
+	}
+
+	/* if not found then add a child listener if interface is going up */
+	if (!ifup)
+		return;
+	child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_ATOMIC);
+	if (!child_listen_node)
+		return;
+	node_allocated = true;
+	memcpy(child_listen_node, parent_listen_node, sizeof(*child_listen_node));
+
+	memcpy(child_listen_node->loc_addr, ipaddr,  ipv4 ? 4 : 16);
+
+set_qhash:
+	memcpy(nfo->loc_addr,
+	       child_listen_node->loc_addr,
+	       sizeof(nfo->loc_addr));
+	nfo->vlan_id = child_listen_node->vlan_id;
+	ret = i40iw_manage_qhash(iwdev, nfo,
+				 I40IW_QHASH_TYPE_TCP_SYN,
+				 op,
+				 NULL, false);
+	if (!ret) {
+		child_listen_node->qhash_set = ifup;
+		if (node_allocated)
+			list_add(&child_listen_node->child_listen_list,
+				 &parent_listen_node->child_listen_list);
+	} else if (node_allocated) {
+		kfree(child_listen_node);
+	}
+}
+
+/**
+ * i40iw_cm_disconnect_all - disconnect all connected qp's
+ * @iwdev: device pointer
+ */
+void i40iw_cm_disconnect_all(struct i40iw_device *iwdev)
+{
+	struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+	struct list_head *list_core_temp;
+	struct list_head *list_node;
+	struct i40iw_cm_node *cm_node;
+	unsigned long flags;
+	struct list_head connected_list;
+	struct ib_qp_attr attr;
+
+	INIT_LIST_HEAD(&connected_list);
+	spin_lock_irqsave(&cm_core->ht_lock, flags);
+	list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) {
+		cm_node = container_of(list_node, struct i40iw_cm_node, list);
+		atomic_inc(&cm_node->ref_count);
+		list_add(&cm_node->connected_entry, &connected_list);
+	}
+	spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+	list_for_each_safe(list_node, list_core_temp, &connected_list) {
+		cm_node = container_of(list_node, struct i40iw_cm_node, connected_entry);
+		attr.qp_state = IB_QPS_ERR;
+		i40iw_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+		i40iw_rem_ref_cm_node(cm_node);
+	}
+}
+
+/**
+ * i40iw_ifdown_notify - process an ifdown on an interface
+ * @iwdev: device pointer
+ * @ipaddr: Pointer to IPv4 or IPv6 address
+ * @ipv4: flag indicating IPv4 when true
+ * @ifup: flag indicating interface up when true
+ */
+void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
+		     u32 *ipaddr, bool ipv4, bool ifup)
+{
+	struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+	unsigned long flags;
+	struct i40iw_cm_listener *listen_node;
+	static const u32 ip_zero[4] = { 0, 0, 0, 0 };
+	struct i40iw_cm_info nfo;
+	u16 vlan_id = rdma_vlan_dev_vlan_id(netdev);
+	enum i40iw_status_code ret;
+	enum i40iw_quad_hash_manage_type op =
+		ifup ? I40IW_QHASH_MANAGE_TYPE_ADD : I40IW_QHASH_MANAGE_TYPE_DELETE;
+
+	/* Disable or enable qhash for listeners */
+	spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+	list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
+		if (vlan_id == listen_node->vlan_id &&
+		    (!memcmp(listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16) ||
+		    !memcmp(listen_node->loc_addr, ip_zero, ipv4 ? 4 : 16))) {
+			memcpy(nfo.loc_addr, listen_node->loc_addr,
+			       sizeof(nfo.loc_addr));
+			nfo.loc_port = listen_node->loc_port;
+			nfo.ipv4 = listen_node->ipv4;
+			nfo.vlan_id = listen_node->vlan_id;
+			nfo.user_pri = listen_node->user_pri;
+			if (!list_empty(&listen_node->child_listen_list)) {
+				i40iw_qhash_ctrl(iwdev,
+						 listen_node,
+						 &nfo,
+						 ipaddr, ipv4, ifup);
+			} else if (memcmp(listen_node->loc_addr, ip_zero,
+					  ipv4 ? 4 : 16)) {
+				ret = i40iw_manage_qhash(iwdev,
+							 &nfo,
+							 I40IW_QHASH_TYPE_TCP_SYN,
+							 op,
+							 NULL,
+							 false);
+				if (!ret)
+					listen_node->qhash_set = ifup;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+	/* disconnect any connected qp's on ifdown */
+	if (!ifup)
+		i40iw_cm_disconnect_all(iwdev);
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
index e9046d9..2e52e38 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h
@@ -56,8 +56,6 @@
 
 #define I40IW_MAX_IETF_SIZE      32
 
-#define MPA_ZERO_PAD_LEN	4
-
 /* IETF RTR MSG Fields               */
 #define IETF_PEER_TO_PEER       0x8000
 #define IETF_FLPDU_ZERO_LEN     0x4000
@@ -299,6 +297,7 @@
 	enum i40iw_cm_listener_state listener_state;
 	u32 reused_node;
 	u8 user_pri;
+	u8 tos;
 	u16 vlan_id;
 	bool qhash_set;
 	bool ipv4;
@@ -341,9 +340,11 @@
 	int accept_pend;
 	struct list_head timer_entry;
 	struct list_head reset_entry;
+	struct list_head connected_entry;
 	atomic_t passive_state;
 	bool qhash_set;
 	u8 user_pri;
+	u8 tos;
 	bool ipv4;
 	bool snd_mark_en;
 	u16 lsmm_size;
@@ -368,7 +369,8 @@
 	u32 rem_addr[4];
 	u16 vlan_id;
 	int backlog;
-	u16 user_pri;
+	u8 user_pri;
+	u8 tos;
 	bool ipv4;
 };
 
@@ -445,4 +447,7 @@
 		    u8 *mac_addr,
 		    u32 action);
 
+void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
+		     u32 *ipaddr, bool ipv4, bool ifup);
+void i40iw_cm_disconnect_all(struct i40iw_device *iwdev);
 #endif /* I40IW_CM_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
index 2c4b4d0..392f783 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -103,6 +103,7 @@
 		if (newtail != tail) {
 			/* SUCCESS */
 			I40IW_RING_MOVE_TAIL(cqp->sq_ring);
+			cqp->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]++;
 			return 0;
 		}
 		udelay(I40IW_SLEEP_COUNT);
@@ -223,6 +224,136 @@
 }
 
 /**
+ * i40iw_fill_qos_list - Change all unknown qs handles to available ones
+ * @qs_list: list of qs_handles to be fixed with valid qs_handles
+ */
+static void i40iw_fill_qos_list(u16 *qs_list)
+{
+	u16 qshandle = qs_list[0];
+	int i;
+
+	for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
+		if (qs_list[i] == QS_HANDLE_UNKNOWN)
+			qs_list[i] = qshandle;
+		else
+			qshandle = qs_list[i];
+	}
+}
+
+/**
+ * i40iw_qp_from_entry - Given entry, get to the qp structure
+ * @entry: Points to list of qp structure
+ */
+static struct i40iw_sc_qp *i40iw_qp_from_entry(struct list_head *entry)
+{
+	if (!entry)
+		return NULL;
+
+	return (struct i40iw_sc_qp *)((char *)entry - offsetof(struct i40iw_sc_qp, list));
+}
+
+/**
+ * i40iw_get_qp - get the next qp from the list given current qp
+ * @head: Listhead of qp's
+ * @qp: current qp
+ */
+static struct i40iw_sc_qp *i40iw_get_qp(struct list_head *head, struct i40iw_sc_qp *qp)
+{
+	struct list_head *entry = NULL;
+	struct list_head *lastentry;
+
+	if (list_empty(head))
+		return NULL;
+
+	if (!qp) {
+		entry = head->next;
+	} else {
+		lastentry = &qp->list;
+		entry = (lastentry != head) ? lastentry->next : NULL;
+	}
+
+	return i40iw_qp_from_entry(entry);
+}
+
+/**
+ * i40iw_change_l2params - given the new l2 parameters, change all qp
+ * @vsi: pointer to the vsi structure
+ * @l2params: New paramaters from l2
+ */
+void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2params)
+{
+	struct i40iw_sc_dev *dev = vsi->dev;
+	struct i40iw_sc_qp *qp = NULL;
+	bool qs_handle_change = false;
+	bool mss_change = false;
+	unsigned long flags;
+	u16 qs_handle;
+	int i;
+
+	if (vsi->mss != l2params->mss) {
+		mss_change = true;
+		vsi->mss = l2params->mss;
+	}
+
+	i40iw_fill_qos_list(l2params->qs_handle_list);
+	for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
+		qs_handle = l2params->qs_handle_list[i];
+		if (vsi->qos[i].qs_handle != qs_handle)
+			qs_handle_change = true;
+		else if (!mss_change)
+			continue;       /* no MSS nor qs handle change */
+		spin_lock_irqsave(&vsi->qos[i].lock, flags);
+		qp = i40iw_get_qp(&vsi->qos[i].qplist, qp);
+		while (qp) {
+			if (mss_change)
+				i40iw_qp_mss_modify(dev, qp);
+			if (qs_handle_change) {
+				qp->qs_handle = qs_handle;
+				/* issue cqp suspend command */
+				i40iw_qp_suspend_resume(dev, qp, true);
+			}
+			qp = i40iw_get_qp(&vsi->qos[i].qplist, qp);
+		}
+		spin_unlock_irqrestore(&vsi->qos[i].lock, flags);
+		vsi->qos[i].qs_handle = qs_handle;
+	}
+}
+
+/**
+ * i40iw_qp_rem_qos - remove qp from qos lists during destroy qp
+ * @qp: qp to be removed from qos
+ */
+static void i40iw_qp_rem_qos(struct i40iw_sc_qp *qp)
+{
+	struct i40iw_sc_vsi *vsi = qp->vsi;
+	unsigned long flags;
+
+	if (!qp->on_qoslist)
+		return;
+	spin_lock_irqsave(&vsi->qos[qp->user_pri].lock, flags);
+	list_del(&qp->list);
+	spin_unlock_irqrestore(&vsi->qos[qp->user_pri].lock, flags);
+}
+
+/**
+ * i40iw_qp_add_qos - called during setctx fot qp to be added to qos
+ * @qp: qp to be added to qos
+ */
+void i40iw_qp_add_qos(struct i40iw_sc_qp *qp)
+{
+	struct i40iw_sc_vsi *vsi = qp->vsi;
+	unsigned long flags;
+
+	if (qp->on_qoslist)
+		return;
+	spin_lock_irqsave(&vsi->qos[qp->user_pri].lock, flags);
+	qp->qs_handle = vsi->qos[qp->user_pri].qs_handle;
+	list_add(&qp->list, &vsi->qos[qp->user_pri].qplist);
+	qp->on_qoslist = true;
+	spin_unlock_irqrestore(&vsi->qos[qp->user_pri].lock, flags);
+}
+
+/**
  * i40iw_sc_pd_init - initialize sc pd struct
  * @dev: sc device struct
  * @pd: sc pd ptr
@@ -292,6 +423,9 @@
 	info->dev->cqp = cqp;
 
 	I40IW_RING_INIT(cqp->sq_ring, cqp->sq_size);
+	cqp->dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS] = 0;
+	cqp->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS] = 0;
+
 	i40iw_debug(cqp->dev, I40IW_DEBUG_WQE,
 		    "%s: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%llxh] cqp[%p] polarity[x%04X]\n",
 		    __func__, cqp->sq_size, cqp->hw_sq_size,
@@ -302,12 +436,10 @@
 /**
  * i40iw_sc_cqp_create - create cqp during bringup
  * @cqp: struct for cqp hw
- * @disable_pfpdus: if pfpdu to be disabled
  * @maj_err: If error, major err number
  * @min_err: If error, minor err number
  */
 static enum i40iw_status_code i40iw_sc_cqp_create(struct i40iw_sc_cqp *cqp,
-						  bool disable_pfpdus,
 						  u16 *maj_err,
 						  u16 *min_err)
 {
@@ -326,9 +458,6 @@
 	temp = LS_64(cqp->hw_sq_size, I40IW_CQPHC_SQSIZE) |
 	       LS_64(cqp->struct_ver, I40IW_CQPHC_SVER);
 
-	if (disable_pfpdus)
-		temp |= LS_64(1, I40IW_CQPHC_DISABLE_PFPDUS);
-
 	set_64bit_val(cqp->host_ctx, 0, temp);
 	set_64bit_val(cqp->host_ctx, 8, cqp->sq_pa);
 	temp = LS_64(cqp->enabled_vf_count, I40IW_CQPHC_ENABLED_VFS) |
@@ -424,6 +553,7 @@
 		return NULL;
 	}
 	I40IW_ATOMIC_RING_MOVE_HEAD(cqp->sq_ring, wqe_idx, ret_code);
+	cqp->dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS]++;
 	if (ret_code)
 		return NULL;
 	if (!wqe_idx)
@@ -559,6 +689,8 @@
 		      I40IW_RING_GETCURRENT_HEAD(ccq->cq_uk.cq_ring));
 	wmb(); /* write shadow area before tail */
 	I40IW_RING_MOVE_TAIL(cqp->sq_ring);
+	ccq->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]++;
+
 	return ret_code;
 }
 
@@ -1051,6 +1183,7 @@
 	u64 qw1 = 0;
 	u64 qw2 = 0;
 	u64 temp;
+	struct i40iw_sc_vsi *vsi = info->vsi;
 
 	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
 	if (!wqe)
@@ -1082,7 +1215,7 @@
 			      LS_64(info->dest_ip[2], I40IW_CQPSQ_QHASH_ADDR2) |
 			      LS_64(info->dest_ip[3], I40IW_CQPSQ_QHASH_ADDR3));
 	}
-	qw2 = LS_64(cqp->dev->qs_handle, I40IW_CQPSQ_QHASH_QS_HANDLE);
+	qw2 = LS_64(vsi->qos[info->user_pri].qs_handle, I40IW_CQPSQ_QHASH_QS_HANDLE);
 	if (info->vlan_valid)
 		qw2 |= LS_64(info->vlan_id, I40IW_CQPSQ_QHASH_VLANID);
 	set_64bit_val(wqe, 16, qw2);
@@ -2103,6 +2236,7 @@
 	u32 offset;
 
 	qp->dev = info->pd->dev;
+	qp->vsi = info->vsi;
 	qp->sq_pa = info->sq_pa;
 	qp->rq_pa = info->rq_pa;
 	qp->hw_host_ctx_pa = info->host_ctx_pa;
@@ -2151,7 +2285,7 @@
 	qp->rq_tph_en = info->rq_tph_en;
 	qp->rcv_tph_en = info->rcv_tph_en;
 	qp->xmit_tph_en = info->xmit_tph_en;
-	qp->qs_handle = qp->pd->dev->qs_handle;
+	qp->qs_handle = qp->vsi->qos[qp->user_pri].qs_handle;
 	qp->exception_lan_queue = qp->pd->dev->exception_lan_queue;
 
 	return 0;
@@ -2296,6 +2430,7 @@
 	struct i40iw_sc_cqp *cqp;
 	u64 header;
 
+	i40iw_qp_rem_qos(qp);
 	cqp = qp->pd->dev->cqp;
 	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
 	if (!wqe)
@@ -2443,10 +2578,20 @@
 {
 	struct i40iwarp_offload_info *iw;
 	struct i40iw_tcp_offload_info *tcp;
+	struct i40iw_sc_vsi *vsi;
+	struct i40iw_sc_dev *dev;
 	u64 qw0, qw3, qw7 = 0;
 
 	iw = info->iwarp_info;
 	tcp = info->tcp_info;
+	vsi = qp->vsi;
+	dev = qp->dev;
+	if (info->add_to_qoslist) {
+		qp->user_pri = info->user_pri;
+		i40iw_qp_add_qos(qp);
+		i40iw_debug(qp->dev, I40IW_DEBUG_DCB, "%s qp[%d] UP[%d] qset[%d]\n",
+			    __func__, qp->qp_uk.qp_id, qp->user_pri, qp->qs_handle);
+	}
 	qw0 = LS_64(qp->qp_uk.rq_wqe_size, I40IWQPC_RQWQESIZE) |
 	      LS_64(info->err_rq_idx_valid, I40IWQPC_ERR_RQ_IDX_VALID) |
 	      LS_64(qp->rcv_tph_en, I40IWQPC_RCVTPHEN) |
@@ -2487,16 +2632,14 @@
 		       LS_64(iw->rdmap_ver, I40IWQPC_RDMAP_VER);
 
 		qw7 |= LS_64(iw->pd_id, I40IWQPC_PDIDX);
-		set_64bit_val(qp_ctx, 144, qp->q2_pa);
+		set_64bit_val(qp_ctx,
+			      144,
+			      LS_64(qp->q2_pa, I40IWQPC_Q2ADDR) |
+			      LS_64(vsi->fcn_id, I40IWQPC_STAT_INDEX));
 		set_64bit_val(qp_ctx,
 			      152,
 			      LS_64(iw->last_byte_sent, I40IWQPC_LASTBYTESENT));
 
-		/*
-		* Hard-code IRD_SIZE to hw-limit, 128, in qpctx, i.e matching an
-		*advertisable IRD of 64
-		*/
-		iw->ird_size = I40IW_QPCTX_ENCD_MAXIRD;
 		set_64bit_val(qp_ctx,
 			      160,
 			      LS_64(iw->ord_size, I40IWQPC_ORDSIZE) |
@@ -2507,6 +2650,9 @@
 			      LS_64(iw->bind_en, I40IWQPC_BINDEN) |
 			      LS_64(iw->fast_reg_en, I40IWQPC_FASTREGEN) |
 			      LS_64(iw->priv_mode_en, I40IWQPC_PRIVEN) |
+			      LS_64((((vsi->stats_fcn_id_alloc) &&
+				      (dev->is_pf) && (vsi->fcn_id >= I40IW_FIRST_NON_PF_STAT)) ? 1 : 0),
+				    I40IWQPC_USESTATSINSTANCE) |
 			      LS_64(1, I40IWQPC_IWARPMODE) |
 			      LS_64(iw->rcv_mark_en, I40IWQPC_RCVMARKERS) |
 			      LS_64(iw->align_hdrs, I40IWQPC_ALIGNHDRS) |
@@ -2623,7 +2769,9 @@
 	u64 *wqe;
 	struct i40iw_sc_cqp *cqp;
 	u64 header;
+	enum i40iw_page_size page_size;
 
+	page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K;
 	cqp = dev->cqp;
 	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
 	if (!wqe)
@@ -2643,7 +2791,7 @@
 		 LS_64(1, I40IW_CQPSQ_STAG_MR) |
 		 LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
 		 LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) |
-		 LS_64(info->page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
+		 LS_64(page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
 		 LS_64(info->remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
 		 LS_64(info->use_hmc_fcn_index, I40IW_CQPSQ_STAG_USEHMCFNIDX) |
 		 LS_64(info->use_pf_rid, I40IW_CQPSQ_STAG_USEPFRID) |
@@ -2679,7 +2827,9 @@
 	u32 pble_obj_cnt;
 	bool remote_access;
 	u8 addr_type;
+	enum i40iw_page_size page_size;
 
+	page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K;
 	if (info->access_rights & (I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY |
 				   I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY))
 		remote_access = true;
@@ -2722,7 +2872,7 @@
 	header = LS_64(I40IW_CQP_OP_REG_MR, I40IW_CQPSQ_OPCODE) |
 		 LS_64(1, I40IW_CQPSQ_STAG_MR) |
 		 LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) |
-		 LS_64(info->page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
+		 LS_64(page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
 		 LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
 		 LS_64(remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
 		 LS_64(addr_type, I40IW_CQPSQ_STAG_VABASEDTO) |
@@ -2937,7 +3087,9 @@
 	u64 temp, header;
 	u64 *wqe;
 	u32 wqe_idx;
+	enum i40iw_page_size page_size;
 
+	page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K;
 	wqe = i40iw_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx, I40IW_QP_WQE_MIN_SIZE,
 					 0, info->wr_id);
 	if (!wqe)
@@ -2964,7 +3116,7 @@
 		 LS_64(info->stag_idx, I40IWQPSQ_STAGINDEX) |
 		 LS_64(I40IWQP_OP_FAST_REGISTER, I40IWQPSQ_OPCODE) |
 		 LS_64(info->chunk_size, I40IWQPSQ_LPBLSIZE) |
-		 LS_64(info->page_size, I40IWQPSQ_HPAGESIZE) |
+		 LS_64(page_size, I40IWQPSQ_HPAGESIZE) |
 		 LS_64(info->access_rights, I40IWQPSQ_STAGRIGHTS) |
 		 LS_64(info->addr_type, I40IWQPSQ_VABASEDTO) |
 		 LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
@@ -3959,7 +4111,7 @@
 					     struct cqp_commands_info *pcmdinfo)
 {
 	enum i40iw_status_code status = 0;
-	unsigned long	flags;
+	unsigned long flags;
 
 	spin_lock_irqsave(&dev->cqp_lock, flags);
 	if (list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp))
@@ -3978,7 +4130,7 @@
 {
 	enum i40iw_status_code status = 0;
 	struct cqp_commands_info *pcmdinfo;
-	unsigned long	flags;
+	unsigned long flags;
 
 	spin_lock_irqsave(&dev->cqp_lock, flags);
 	while (!list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp)) {
@@ -4055,7 +4207,6 @@
 	u16 ddp_seg_len;
 	int copy_len = 0;
 	u8 is_tagged = 0;
-	enum i40iw_flush_opcode flush_code = FLUSH_INVALID;
 	u32 opcode;
 	struct i40iw_terminate_hdr *termhdr;
 
@@ -4228,9 +4379,6 @@
 	if (copy_len)
 		memcpy(termhdr + 1, pkt, copy_len);
 
-	if (flush_code && !info->in_rdrsp_wr)
-		qp->sq_flush = (info->sq) ? true : false;
-
 	return sizeof(struct i40iw_terminate_hdr) + copy_len;
 }
 
@@ -4321,286 +4469,370 @@
 }
 
 /**
- * i40iw_hw_stat_init - Initiliaze HW stats table
- * @devstat: pestat struct
+ * i40iw_sc_vsi_init - Initialize virtual device
+ * @vsi: pointer to the vsi structure
+ * @info: parameters to initialize vsi
+ **/
+void i40iw_sc_vsi_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_init_info *info)
+{
+	int i;
+
+	vsi->dev = info->dev;
+	vsi->back_vsi = info->back_vsi;
+	vsi->mss = info->params->mss;
+	i40iw_fill_qos_list(info->params->qs_handle_list);
+
+	for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
+		vsi->qos[i].qs_handle =
+			info->params->qs_handle_list[i];
+			i40iw_debug(vsi->dev, I40IW_DEBUG_DCB, "qset[%d]: %d\n", i, vsi->qos[i].qs_handle);
+		spin_lock_init(&vsi->qos[i].lock);
+		INIT_LIST_HEAD(&vsi->qos[i].qplist);
+	}
+}
+
+/**
+ * i40iw_hw_stats_init - Initiliaze HW stats table
+ * @stats: pestat struct
  * @fcn_idx: PCI fn id
- * @hw: PF i40iw_hw structure.
  * @is_pf: Is it a PF?
  *
- * Populate the HW stat table with register offset addr for each
- * stat. And start the perioidic stats timer.
+ * Populate the HW stats table with register offset addr for each
+ * stats. And start the perioidic stats timer.
  */
-static void i40iw_hw_stat_init(struct i40iw_dev_pestat *devstat,
-			       u8 fcn_idx,
-			       struct i40iw_hw *hw, bool is_pf)
+void i40iw_hw_stats_init(struct i40iw_vsi_pestat *stats, u8 fcn_idx, bool is_pf)
 {
-	u32 stat_reg_offset;
-	u32 stat_index;
-	struct i40iw_dev_hw_stat_offsets *stat_table =
-		&devstat->hw_stat_offsets;
-	struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats;
-
-	devstat->hw = hw;
+	u32 stats_reg_offset;
+	u32 stats_index;
+	struct i40iw_dev_hw_stats_offsets *stats_table =
+		&stats->hw_stats_offsets;
+	struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats;
 
 	if (is_pf) {
-		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
+		stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
 				I40E_GLPES_PFIP4RXDISCARD(fcn_idx);
-		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
+		stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
 				I40E_GLPES_PFIP4RXTRUNC(fcn_idx);
-		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
+		stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
 				I40E_GLPES_PFIP4TXNOROUTE(fcn_idx);
-		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
+		stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
 				I40E_GLPES_PFIP6RXDISCARD(fcn_idx);
-		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
+		stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
 				I40E_GLPES_PFIP6RXTRUNC(fcn_idx);
-		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
+		stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
 				I40E_GLPES_PFIP6TXNOROUTE(fcn_idx);
-		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
+		stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
 				I40E_GLPES_PFTCPRTXSEG(fcn_idx);
-		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
+		stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
 				I40E_GLPES_PFTCPRXOPTERR(fcn_idx);
-		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
+		stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
 				I40E_GLPES_PFTCPRXPROTOERR(fcn_idx);
 
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
 				I40E_GLPES_PFIP4RXOCTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
 				I40E_GLPES_PFIP4RXPKTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
 				I40E_GLPES_PFIP4RXFRAGSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
 				I40E_GLPES_PFIP4RXMCPKTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
 				I40E_GLPES_PFIP4TXOCTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
 				I40E_GLPES_PFIP4TXPKTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
 				I40E_GLPES_PFIP4TXFRAGSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
 				I40E_GLPES_PFIP4TXMCPKTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
 				I40E_GLPES_PFIP6RXOCTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
 				I40E_GLPES_PFIP6RXPKTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
 				I40E_GLPES_PFIP6RXFRAGSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
 				I40E_GLPES_PFIP6RXMCPKTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
 				I40E_GLPES_PFIP6TXOCTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
 				I40E_GLPES_PFIP6TXPKTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
 				I40E_GLPES_PFIP6TXPKTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
 				I40E_GLPES_PFIP6TXFRAGSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
 				I40E_GLPES_PFTCPRXSEGSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
 				I40E_GLPES_PFTCPTXSEGLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
 				I40E_GLPES_PFRDMARXRDSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
 				I40E_GLPES_PFRDMARXSNDSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
 				I40E_GLPES_PFRDMARXWRSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
 				I40E_GLPES_PFRDMATXRDSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
 				I40E_GLPES_PFRDMATXSNDSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
 				I40E_GLPES_PFRDMATXWRSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
 				I40E_GLPES_PFRDMAVBNDLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
 				I40E_GLPES_PFRDMAVINVLO(fcn_idx);
 	} else {
-		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
+		stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
 				I40E_GLPES_VFIP4RXDISCARD(fcn_idx);
-		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
+		stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
 				I40E_GLPES_VFIP4RXTRUNC(fcn_idx);
-		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
+		stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
 				I40E_GLPES_VFIP4TXNOROUTE(fcn_idx);
-		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
+		stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
 				I40E_GLPES_VFIP6RXDISCARD(fcn_idx);
-		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
+		stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
 				I40E_GLPES_VFIP6RXTRUNC(fcn_idx);
-		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
+		stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
 				I40E_GLPES_VFIP6TXNOROUTE(fcn_idx);
-		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
+		stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
 				I40E_GLPES_VFTCPRTXSEG(fcn_idx);
-		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
+		stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
 				I40E_GLPES_VFTCPRXOPTERR(fcn_idx);
-		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
+		stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
 				I40E_GLPES_VFTCPRXPROTOERR(fcn_idx);
 
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
 				I40E_GLPES_VFIP4RXOCTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
 				I40E_GLPES_VFIP4RXPKTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
 				I40E_GLPES_VFIP4RXFRAGSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
 				I40E_GLPES_VFIP4RXMCPKTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
 				I40E_GLPES_VFIP4TXOCTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
 				I40E_GLPES_VFIP4TXPKTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
 				I40E_GLPES_VFIP4TXFRAGSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
 				I40E_GLPES_VFIP4TXMCPKTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
 				I40E_GLPES_VFIP6RXOCTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
 				I40E_GLPES_VFIP6RXPKTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
 				I40E_GLPES_VFIP6RXFRAGSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
 				I40E_GLPES_VFIP6RXMCPKTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
 				I40E_GLPES_VFIP6TXOCTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
 				I40E_GLPES_VFIP6TXPKTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
 				I40E_GLPES_VFIP6TXPKTSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
 				I40E_GLPES_VFIP6TXFRAGSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
 				I40E_GLPES_VFTCPRXSEGSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
 				I40E_GLPES_VFTCPTXSEGLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
 				I40E_GLPES_VFRDMARXRDSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
 				I40E_GLPES_VFRDMARXSNDSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
 				I40E_GLPES_VFRDMARXWRSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
 				I40E_GLPES_VFRDMATXRDSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
 				I40E_GLPES_VFRDMATXSNDSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
 				I40E_GLPES_VFRDMATXWRSLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
 				I40E_GLPES_VFRDMAVBNDLO(fcn_idx);
-		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
+		stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
 				I40E_GLPES_VFRDMAVINVLO(fcn_idx);
 	}
 
-	for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64;
-	     stat_index++) {
-		stat_reg_offset = stat_table->stat_offset_64[stat_index];
-		last_rd_stats->stat_value_64[stat_index] =
-			readq(devstat->hw->hw_addr + stat_reg_offset);
+	for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64;
+	     stats_index++) {
+		stats_reg_offset = stats_table->stats_offset_64[stats_index];
+		last_rd_stats->stats_value_64[stats_index] =
+			readq(stats->hw->hw_addr + stats_reg_offset);
 	}
 
-	for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32;
-	     stat_index++) {
-		stat_reg_offset = stat_table->stat_offset_32[stat_index];
-		last_rd_stats->stat_value_32[stat_index] =
-			i40iw_rd32(devstat->hw, stat_reg_offset);
+	for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32;
+	     stats_index++) {
+		stats_reg_offset = stats_table->stats_offset_32[stats_index];
+		last_rd_stats->stats_value_32[stats_index] =
+			i40iw_rd32(stats->hw, stats_reg_offset);
 	}
 }
 
 /**
- * i40iw_hw_stat_read_32 - Read 32-bit HW stat counters and accommodates for roll-overs.
- * @devstat: pestat struct
- * @index: index in HW stat table which contains offset reg-addr
- * @value: hw stat value
+ * i40iw_hw_stats_read_32 - Read 32-bit HW stats counters and accommodates for roll-overs.
+ * @stat: pestat struct
+ * @index: index in HW stats table which contains offset reg-addr
+ * @value: hw stats value
  */
-static void i40iw_hw_stat_read_32(struct i40iw_dev_pestat *devstat,
-				  enum i40iw_hw_stat_index_32b index,
-				  u64 *value)
+void i40iw_hw_stats_read_32(struct i40iw_vsi_pestat *stats,
+			    enum i40iw_hw_stats_index_32b index,
+			    u64 *value)
 {
-	struct i40iw_dev_hw_stat_offsets *stat_table =
-		&devstat->hw_stat_offsets;
-	struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats;
-	struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
-	u64 new_stat_value = 0;
-	u32 stat_reg_offset = stat_table->stat_offset_32[index];
+	struct i40iw_dev_hw_stats_offsets *stats_table =
+		&stats->hw_stats_offsets;
+	struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats;
+	struct i40iw_dev_hw_stats *hw_stats = &stats->hw_stats;
+	u64 new_stats_value = 0;
+	u32 stats_reg_offset = stats_table->stats_offset_32[index];
 
-	new_stat_value = i40iw_rd32(devstat->hw, stat_reg_offset);
+	new_stats_value = i40iw_rd32(stats->hw, stats_reg_offset);
 	/*roll-over case */
-	if (new_stat_value < last_rd_stats->stat_value_32[index])
-		hw_stats->stat_value_32[index] += new_stat_value;
+	if (new_stats_value < last_rd_stats->stats_value_32[index])
+		hw_stats->stats_value_32[index] += new_stats_value;
 	else
-		hw_stats->stat_value_32[index] +=
-			new_stat_value - last_rd_stats->stat_value_32[index];
-	last_rd_stats->stat_value_32[index] = new_stat_value;
-	*value = hw_stats->stat_value_32[index];
+		hw_stats->stats_value_32[index] +=
+			new_stats_value - last_rd_stats->stats_value_32[index];
+	last_rd_stats->stats_value_32[index] = new_stats_value;
+	*value = hw_stats->stats_value_32[index];
 }
 
 /**
- * i40iw_hw_stat_read_64 - Read HW stat counters (greater than 32-bit) and accommodates for roll-overs.
- * @devstat: pestat struct
- * @index: index in HW stat table which contains offset reg-addr
- * @value: hw stat value
+ * i40iw_hw_stats_read_64 - Read HW stats counters (greater than 32-bit) and accommodates for roll-overs.
+ * @stats: pestat struct
+ * @index: index in HW stats table which contains offset reg-addr
+ * @value: hw stats value
  */
-static void i40iw_hw_stat_read_64(struct i40iw_dev_pestat *devstat,
-				  enum i40iw_hw_stat_index_64b index,
-				  u64 *value)
+void i40iw_hw_stats_read_64(struct i40iw_vsi_pestat *stats,
+			    enum i40iw_hw_stats_index_64b index,
+			    u64 *value)
 {
-	struct i40iw_dev_hw_stat_offsets *stat_table =
-		&devstat->hw_stat_offsets;
-	struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats;
-	struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
-	u64 new_stat_value = 0;
-	u32 stat_reg_offset = stat_table->stat_offset_64[index];
+	struct i40iw_dev_hw_stats_offsets *stats_table =
+		&stats->hw_stats_offsets;
+	struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats;
+	struct i40iw_dev_hw_stats *hw_stats = &stats->hw_stats;
+	u64 new_stats_value = 0;
+	u32 stats_reg_offset = stats_table->stats_offset_64[index];
 
-	new_stat_value = readq(devstat->hw->hw_addr + stat_reg_offset);
+	new_stats_value = readq(stats->hw->hw_addr + stats_reg_offset);
 	/*roll-over case */
-	if (new_stat_value < last_rd_stats->stat_value_64[index])
-		hw_stats->stat_value_64[index] += new_stat_value;
+	if (new_stats_value < last_rd_stats->stats_value_64[index])
+		hw_stats->stats_value_64[index] += new_stats_value;
 	else
-		hw_stats->stat_value_64[index] +=
-			new_stat_value - last_rd_stats->stat_value_64[index];
-	last_rd_stats->stat_value_64[index] = new_stat_value;
-	*value = hw_stats->stat_value_64[index];
+		hw_stats->stats_value_64[index] +=
+			new_stats_value - last_rd_stats->stats_value_64[index];
+	last_rd_stats->stats_value_64[index] = new_stats_value;
+	*value = hw_stats->stats_value_64[index];
 }
 
 /**
- * i40iw_hw_stat_read_all - read all HW stat counters
- * @devstat: pestat struct
- * @stat_values: hw stats structure
+ * i40iw_hw_stats_read_all - read all HW stat counters
+ * @stats: pestat struct
+ * @stats_values: hw stats structure
  *
  * Read all the HW stat counters and populates hw_stats structure
- * of passed-in dev's pestat as well as copy created in stat_values.
+ * of passed-in vsi's pestat as well as copy created in stat_values.
  */
-static void i40iw_hw_stat_read_all(struct i40iw_dev_pestat *devstat,
-				   struct i40iw_dev_hw_stats *stat_values)
+void i40iw_hw_stats_read_all(struct i40iw_vsi_pestat *stats,
+			     struct i40iw_dev_hw_stats *stats_values)
 {
-	u32 stat_index;
+	u32 stats_index;
+	unsigned long flags;
 
-	for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32;
-	     stat_index++)
-		i40iw_hw_stat_read_32(devstat, stat_index,
-				      &stat_values->stat_value_32[stat_index]);
-	for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64;
-	     stat_index++)
-		i40iw_hw_stat_read_64(devstat, stat_index,
-				      &stat_values->stat_value_64[stat_index]);
+	spin_lock_irqsave(&stats->lock, flags);
+
+	for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32;
+	     stats_index++)
+		i40iw_hw_stats_read_32(stats, stats_index,
+				       &stats_values->stats_value_32[stats_index]);
+	for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64;
+	     stats_index++)
+		i40iw_hw_stats_read_64(stats, stats_index,
+				       &stats_values->stats_value_64[stats_index]);
+	spin_unlock_irqrestore(&stats->lock, flags);
 }
 
 /**
- * i40iw_hw_stat_refresh_all - Update all HW stat structs
- * @devstat: pestat struct
- * @stat_values: hw stats structure
+ * i40iw_hw_stats_refresh_all - Update all HW stats structs
+ * @stats: pestat struct
  *
- * Read all the HW stat counters to refresh values in hw_stats structure
+ * Read all the HW stats counters to refresh values in hw_stats structure
  * of passed-in dev's pestat
  */
-static void i40iw_hw_stat_refresh_all(struct i40iw_dev_pestat *devstat)
+void i40iw_hw_stats_refresh_all(struct i40iw_vsi_pestat *stats)
 {
-	u64 stat_value;
-	u32 stat_index;
+	u64 stats_value;
+	u32 stats_index;
+	unsigned long flags;
 
-	for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32;
-	     stat_index++)
-		i40iw_hw_stat_read_32(devstat, stat_index, &stat_value);
-	for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64;
-	     stat_index++)
-		i40iw_hw_stat_read_64(devstat, stat_index, &stat_value);
+	spin_lock_irqsave(&stats->lock, flags);
+
+	for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32;
+	     stats_index++)
+		i40iw_hw_stats_read_32(stats, stats_index, &stats_value);
+	for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64;
+	     stats_index++)
+		i40iw_hw_stats_read_64(stats, stats_index, &stats_value);
+	spin_unlock_irqrestore(&stats->lock, flags);
+}
+
+/**
+ * i40iw_get_fcn_id - Return the function id
+ * @dev: pointer to the device
+ */
+static u8 i40iw_get_fcn_id(struct i40iw_sc_dev *dev)
+{
+	u8 fcn_id = I40IW_INVALID_FCN_ID;
+	u8 i;
+
+	for (i = I40IW_FIRST_NON_PF_STAT; i < I40IW_MAX_STATS_COUNT; i++)
+		if (!dev->fcn_id_array[i]) {
+			fcn_id = i;
+			dev->fcn_id_array[i] = true;
+			break;
+		}
+	return fcn_id;
+}
+
+/**
+ * i40iw_vsi_stats_init - Initialize the vsi statistics
+ * @vsi: pointer to the vsi structure
+ * @info: The info structure used for initialization
+ */
+enum i40iw_status_code i40iw_vsi_stats_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_stats_info *info)
+{
+	u8 fcn_id = info->fcn_id;
+
+	if (info->alloc_fcn_id)
+		fcn_id = i40iw_get_fcn_id(vsi->dev);
+
+	if (fcn_id == I40IW_INVALID_FCN_ID)
+		return I40IW_ERR_NOT_READY;
+
+	vsi->pestat = info->pestat;
+	vsi->pestat->hw = vsi->dev->hw;
+
+	if (info->stats_initialize) {
+		i40iw_hw_stats_init(vsi->pestat, fcn_id, true);
+		spin_lock_init(&vsi->pestat->lock);
+		i40iw_hw_stats_start_timer(vsi);
+	}
+	vsi->stats_fcn_id_alloc = info->alloc_fcn_id;
+	vsi->fcn_id = fcn_id;
+	return I40IW_SUCCESS;
+}
+
+/**
+ * i40iw_vsi_stats_free - Free the vsi stats
+ * @vsi: pointer to the vsi structure
+ */
+void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi)
+{
+	u8 fcn_id = vsi->fcn_id;
+
+	if ((vsi->stats_fcn_id_alloc) && (fcn_id != I40IW_INVALID_FCN_ID))
+		vsi->dev->fcn_id_array[fcn_id] = false;
+	i40iw_hw_stats_stop_timer(vsi);
 }
 
 static struct i40iw_cqp_ops iw_cqp_ops = {
@@ -4711,24 +4943,6 @@
 	NULL
 };
 
-static const struct i40iw_device_pestat_ops iw_device_pestat_ops = {
-	i40iw_hw_stat_init,
-	i40iw_hw_stat_read_32,
-	i40iw_hw_stat_read_64,
-	i40iw_hw_stat_read_all,
-	i40iw_hw_stat_refresh_all
-};
-
-/**
- * i40iw_device_init_pestat - Initialize the pestat structure
- * @dev: pestat struct
- */
-enum i40iw_status_code i40iw_device_init_pestat(struct i40iw_dev_pestat *devstat)
-{
-	devstat->ops = iw_device_pestat_ops;
-	return 0;
-}
-
 /**
  * i40iw_device_init - Initialize IWARP device
  * @dev: IWARP device pointer
@@ -4750,14 +4964,7 @@
 
 	dev->debug_mask = info->debug_mask;
 
-	ret_code = i40iw_device_init_pestat(&dev->dev_pestat);
-	if (ret_code) {
-		i40iw_debug(dev, I40IW_DEBUG_DEV,
-			    "%s: i40iw_device_init_pestat failed\n", __func__);
-		return ret_code;
-	}
 	dev->hmc_fn_id = info->hmc_fn_id;
-	dev->qs_handle = info->qs_handle;
 	dev->exception_lan_queue = info->exception_lan_queue;
 	dev->is_pf = info->is_pf;
 
@@ -4770,15 +4977,10 @@
 	dev->hw = info->hw;
 	dev->hw->hw_addr = info->bar0;
 
-	val = i40iw_rd32(dev->hw, I40E_GLPCI_DREVID);
-	dev->hw_rev = (u8)RS_32(val, I40E_GLPCI_DREVID_DEFAULT_REVID);
-
 	if (dev->is_pf) {
-		dev->dev_pestat.ops.iw_hw_stat_init(&dev->dev_pestat,
-			dev->hmc_fn_id, dev->hw, true);
-		spin_lock_init(&dev->dev_pestat.stats_lock);
-		/*start the periodic stats_timer */
-		i40iw_hw_stats_start_timer(dev);
+		val = i40iw_rd32(dev->hw, I40E_GLPCI_DREVID);
+		dev->hw_rev = (u8)RS_32(val, I40E_GLPCI_DREVID_DEFAULT_REVID);
+
 		val = i40iw_rd32(dev->hw, I40E_GLPCI_LBARCTRL);
 		db_size = (u8)RS_32(val, I40E_GLPCI_LBARCTRL_PE_DB_SIZE);
 		if ((db_size != I40IW_PE_DB_SIZE_4M) &&
diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h
index 2fac1db..a39ac12 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_d.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_d.h
@@ -35,6 +35,8 @@
 #ifndef I40IW_D_H
 #define I40IW_D_H
 
+#define I40IW_FIRST_USER_QP_ID  2
+
 #define I40IW_DB_ADDR_OFFSET    (4 * 1024 * 1024 - 64 * 1024)
 #define I40IW_VF_DB_ADDR_OFFSET (64 * 1024)
 
@@ -67,6 +69,9 @@
 #define I40IW_STAG_TYPE_NONSHARED 1
 
 #define I40IW_MAX_USER_PRIORITY 8
+#define I40IW_MAX_STATS_COUNT 16
+#define I40IW_FIRST_NON_PF_STAT	4
+
 
 #define LS_64_1(val, bits)      ((u64)(uintptr_t)val << bits)
 #define RS_64_1(val, bits)      ((u64)(uintptr_t)val >> bits)
@@ -74,6 +79,8 @@
 #define RS_32_1(val, bits)      (u32)(val >> bits)
 #define I40E_HI_DWORD(x)        ((u32)((((x) >> 16) >> 16) & 0xFFFFFFFF))
 
+#define QS_HANDLE_UNKNOWN       0xffff
+
 #define LS_64(val, field) (((u64)val << field ## _SHIFT) & (field ## _MASK))
 
 #define RS_64(val, field) ((u64)(val & field ## _MASK) >> field ## _SHIFT)
@@ -1199,8 +1206,11 @@
 #define I40IWQPC_RXCQNUM_SHIFT 32
 #define I40IWQPC_RXCQNUM_MASK (0x1ffffULL << I40IWQPC_RXCQNUM_SHIFT)
 
-#define I40IWQPC_Q2ADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IWQPC_Q2ADDR_MASK I40IW_CQPHC_QPCTX_MASK
+#define I40IWQPC_STAT_INDEX_SHIFT 0
+#define I40IWQPC_STAT_INDEX_MASK (0x1fULL << I40IWQPC_STAT_INDEX_SHIFT)
+
+#define I40IWQPC_Q2ADDR_SHIFT 0
+#define I40IWQPC_Q2ADDR_MASK (0xffffffffffffff00ULL << I40IWQPC_Q2ADDR_SHIFT)
 
 #define I40IWQPC_LASTBYTESENT_SHIFT 0
 #define I40IWQPC_LASTBYTESENT_MASK (0xffUL << I40IWQPC_LASTBYTESENT_SHIFT)
@@ -1232,11 +1242,8 @@
 #define I40IWQPC_PRIVEN_SHIFT 25
 #define I40IWQPC_PRIVEN_MASK (1UL << I40IWQPC_PRIVEN_SHIFT)
 
-#define I40IWQPC_LSMMPRESENT_SHIFT 26
-#define I40IWQPC_LSMMPRESENT_MASK (1UL << I40IWQPC_LSMMPRESENT_SHIFT)
-
-#define I40IWQPC_ADJUSTFORLSMM_SHIFT 27
-#define I40IWQPC_ADJUSTFORLSMM_MASK (1UL << I40IWQPC_ADJUSTFORLSMM_SHIFT)
+#define I40IWQPC_USESTATSINSTANCE_SHIFT 26
+#define I40IWQPC_USESTATSINSTANCE_MASK (1UL << I40IWQPC_USESTATSINSTANCE_SHIFT)
 
 #define I40IWQPC_IWARPMODE_SHIFT 28
 #define I40IWQPC_IWARPMODE_MASK (1UL << I40IWQPC_IWARPMODE_SHIFT)
@@ -1713,6 +1720,8 @@
 #define OP_MANAGE_VF_PBLE_BP                    28
 #define OP_QUERY_FPM_VALUES                     29
 #define OP_COMMIT_FPM_VALUES                    30
-#define OP_SIZE_CQP_STAT_ARRAY                  31
+#define OP_REQUESTED_COMMANDS                   31
+#define OP_COMPLETED_COMMANDS                   32
+#define OP_SIZE_CQP_STAT_ARRAY                  33
 
 #endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index 0c92a40..476867a 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -62,7 +62,7 @@
 	max_mr = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt;
 	arp_table_size = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_ARP].cnt;
 	iwdev->max_cqe = 0xFFFFF;
-	num_pds = max_qp * 4;
+	num_pds = I40IW_MAX_PDS;
 	resources_size = sizeof(struct i40iw_arp_entry) * arp_table_size;
 	resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_qp);
 	resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_mr);
@@ -308,7 +308,9 @@
 			iwqp = iwdev->qp_table[info->qp_cq_id];
 			if (!iwqp) {
 				spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
-				i40iw_pr_err("qp_id %d is already freed\n", info->qp_cq_id);
+				i40iw_debug(dev, I40IW_DEBUG_AEQ,
+					    "%s qp_id %d is already freed\n",
+					    __func__, info->qp_cq_id);
 				continue;
 			}
 			i40iw_add_ref(&iwqp->ibqp);
@@ -359,6 +361,9 @@
 				continue;
 			i40iw_cm_disconn(iwqp);
 			break;
+		case I40IW_AE_QP_SUSPEND_COMPLETE:
+			i40iw_qp_suspend_resume(dev, &iwqp->sc_qp, false);
+			break;
 		case I40IW_AE_TERMINATE_SENT:
 			i40iw_terminate_send_fin(qp);
 			break;
@@ -404,19 +409,18 @@
 		case I40IW_AE_LCE_CQ_CATASTROPHIC:
 		case I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG:
 		case I40IW_AE_UDA_XMIT_IPADDR_MISMATCH:
-		case I40IW_AE_QP_SUSPEND_COMPLETE:
 			ctx_info->err_rq_idx_valid = false;
 		default:
-				if (!info->sq && ctx_info->err_rq_idx_valid) {
-					ctx_info->err_rq_idx = info->wqe_idx;
-					ctx_info->tcp_info_valid = false;
-					ctx_info->iwarp_info_valid = false;
-					ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
-									     iwqp->host_ctx.va,
-									     ctx_info);
-				}
-				i40iw_terminate_connection(qp, info);
-				break;
+			if (!info->sq && ctx_info->err_rq_idx_valid) {
+				ctx_info->err_rq_idx = info->wqe_idx;
+				ctx_info->tcp_info_valid = false;
+				ctx_info->iwarp_info_valid = false;
+				ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
+								     iwqp->host_ctx.va,
+								     ctx_info);
+			}
+			i40iw_terminate_connection(qp, info);
+			break;
 		}
 		if (info->qp)
 			i40iw_rem_ref(&iwqp->ibqp);
@@ -538,6 +542,7 @@
 {
 	struct i40iw_qhash_table_info *info;
 	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_sc_vsi *vsi = &iwdev->vsi;
 	enum i40iw_status_code status;
 	struct i40iw_cqp *iwcqp = &iwdev->cqp;
 	struct i40iw_cqp_request *cqp_request;
@@ -550,6 +555,7 @@
 	info = &cqp_info->in.u.manage_qhash_table_entry.info;
 	memset(info, 0, sizeof(*info));
 
+	info->vsi = &iwdev->vsi;
 	info->manage = mtype;
 	info->entry_type = etype;
 	if (cminfo->vlan_id != 0xFFFF) {
@@ -560,8 +566,9 @@
 	}
 
 	info->ipv4_valid = cminfo->ipv4;
+	info->user_pri = cminfo->user_pri;
 	ether_addr_copy(info->mac_addr, iwdev->netdev->dev_addr);
-	info->qp_num = cpu_to_le32(dev->ilq->qp_id);
+	info->qp_num = cpu_to_le32(vsi->ilq->qp_id);
 	info->dest_port = cpu_to_le16(cminfo->loc_port);
 	info->dest_ip[0] = cpu_to_le32(cminfo->loc_addr[0]);
 	info->dest_ip[1] = cpu_to_le32(cminfo->loc_addr[1]);
@@ -617,6 +624,7 @@
 	struct i40iw_qp_flush_info *hw_info;
 	struct i40iw_cqp_request *cqp_request;
 	struct cqp_commands_info *cqp_info;
+	struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
 
 	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
 	if (!cqp_request)
@@ -631,9 +639,30 @@
 	cqp_info->in.u.qp_flush_wqes.qp = qp;
 	cqp_info->in.u.qp_flush_wqes.scratch = (uintptr_t)cqp_request;
 	status = i40iw_handle_cqp_op(iwdev, cqp_request);
-	if (status)
+	if (status) {
 		i40iw_pr_err("CQP-OP Flush WQE's fail");
-	return status;
+		complete(&iwqp->sq_drained);
+		complete(&iwqp->rq_drained);
+		return status;
+	}
+	if (!cqp_request->compl_info.maj_err_code) {
+		switch (cqp_request->compl_info.min_err_code) {
+		case I40IW_CQP_COMPL_RQ_WQE_FLUSHED:
+			complete(&iwqp->sq_drained);
+			break;
+		case I40IW_CQP_COMPL_SQ_WQE_FLUSHED:
+			complete(&iwqp->rq_drained);
+			break;
+		case I40IW_CQP_COMPL_RQ_SQ_WQE_FLUSHED:
+			break;
+		default:
+			complete(&iwqp->sq_drained);
+			complete(&iwqp->rq_drained);
+			break;
+		}
+	}
+
+	return 0;
 }
 
 /**
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index ac2f3cd..2728af3 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -237,14 +237,11 @@
  */
 static void i40iw_destroy_cqp(struct i40iw_device *iwdev, bool free_hwcqp)
 {
-	enum i40iw_status_code status = 0;
 	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
 	struct i40iw_cqp *cqp = &iwdev->cqp;
 
-	if (free_hwcqp && dev->cqp_ops->cqp_destroy)
-		status = dev->cqp_ops->cqp_destroy(dev->cqp);
-	if (status)
-		i40iw_pr_err("destroy cqp failed");
+	if (free_hwcqp)
+		dev->cqp_ops->cqp_destroy(dev->cqp);
 
 	i40iw_free_dma_mem(dev->hw, &cqp->sq);
 	kfree(cqp->scratch_array);
@@ -270,6 +267,7 @@
 		i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_vec->idx - 1), 0);
 	else
 		i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_vec->idx - 1), 0);
+	irq_set_affinity_hint(msix_vec->irq, NULL);
 	free_irq(msix_vec->irq, dev_id);
 }
 
@@ -603,7 +601,7 @@
 		i40iw_pr_err("cqp init status %d\n", status);
 		goto exit;
 	}
-	status = dev->cqp_ops->cqp_create(dev->cqp, true, &maj_err, &min_err);
+	status = dev->cqp_ops->cqp_create(dev->cqp, &maj_err, &min_err);
 	if (status) {
 		i40iw_pr_err("cqp create status %d maj_err %d min_err %d\n",
 			     status, maj_err, min_err);
@@ -688,6 +686,7 @@
 							 struct i40iw_msix_vector *msix_vec)
 {
 	enum i40iw_status_code status;
+	cpumask_t mask;
 
 	if (iwdev->msix_shared && !ceq_id) {
 		tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
@@ -697,12 +696,15 @@
 		status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq);
 	}
 
+	cpumask_clear(&mask);
+	cpumask_set_cpu(msix_vec->cpu_affinity, &mask);
+	irq_set_affinity_hint(msix_vec->irq, &mask);
+
 	if (status) {
 		i40iw_pr_err("ceq irq config fail\n");
 		return I40IW_ERR_CONFIG;
 	}
 	msix_vec->ceq_id = ceq_id;
-	msix_vec->cpu_affinity = 0;
 
 	return 0;
 }
@@ -930,6 +932,7 @@
 	struct i40iw_puda_rsrc_info info;
 	enum i40iw_status_code status;
 
+	memset(&info, 0, sizeof(info));
 	info.type = I40IW_PUDA_RSRC_TYPE_ILQ;
 	info.cq_id = 1;
 	info.qp_id = 0;
@@ -939,10 +942,9 @@
 	info.rq_size = 8192;
 	info.buf_size = 1024;
 	info.tx_buf_cnt = 16384;
-	info.mss = iwdev->mss;
 	info.receive = i40iw_receive_ilq;
 	info.xmit_complete = i40iw_free_sqbuf;
-	status = i40iw_puda_create_rsrc(&iwdev->sc_dev, &info);
+	status = i40iw_puda_create_rsrc(&iwdev->vsi, &info);
 	if (status)
 		i40iw_pr_err("ilq create fail\n");
 	return status;
@@ -959,6 +961,7 @@
 	struct i40iw_puda_rsrc_info info;
 	enum i40iw_status_code status;
 
+	memset(&info, 0, sizeof(info));
 	info.type = I40IW_PUDA_RSRC_TYPE_IEQ;
 	info.cq_id = 2;
 	info.qp_id = iwdev->sc_dev.exception_lan_queue;
@@ -967,9 +970,8 @@
 	info.sq_size = 8192;
 	info.rq_size = 8192;
 	info.buf_size = 2048;
-	info.mss = iwdev->mss;
 	info.tx_buf_cnt = 16384;
-	status = i40iw_puda_create_rsrc(&iwdev->sc_dev, &info);
+	status = i40iw_puda_create_rsrc(&iwdev->vsi, &info);
 	if (status)
 		i40iw_pr_err("ieq create fail\n");
 	return status;
@@ -1159,7 +1161,7 @@
 {
 	struct net_device *ip_dev;
 	struct inet6_dev *idev;
-	struct inet6_ifaddr *ifp;
+	struct inet6_ifaddr *ifp, *tmp;
 	u32 local_ipaddr6[4];
 
 	rcu_read_lock();
@@ -1172,7 +1174,7 @@
 				i40iw_pr_err("ipv6 inet device not found\n");
 				break;
 			}
-			list_for_each_entry(ifp, &idev->addr_list, if_list) {
+			list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
 				i40iw_pr_info("IP=%pI6, vlan_id=%d, MAC=%pM\n", &ifp->addr,
 					      rdma_vlan_dev_vlan_id(ip_dev), ip_dev->dev_addr);
 				i40iw_copy_ip_ntohl(local_ipaddr6,
@@ -1294,17 +1296,23 @@
 	enum i40iw_status_code status;
 	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
 	struct i40iw_device_init_info info;
+	struct i40iw_vsi_init_info vsi_info;
 	struct i40iw_dma_mem mem;
+	struct i40iw_l2params l2params;
 	u32 size;
+	struct i40iw_vsi_stats_info stats_info;
+	u16 last_qset = I40IW_NO_QSET;
+	u16 qset;
+	u32 i;
 
+	memset(&l2params, 0, sizeof(l2params));
 	memset(&info, 0, sizeof(info));
 	size = sizeof(struct i40iw_hmc_pble_rsrc) + sizeof(struct i40iw_hmc_info) +
 				(sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX);
 	iwdev->hmc_info_mem = kzalloc(size, GFP_KERNEL);
-	if (!iwdev->hmc_info_mem) {
-		i40iw_pr_err("memory alloc fail\n");
+	if (!iwdev->hmc_info_mem)
 		return I40IW_ERR_NO_MEMORY;
-	}
+
 	iwdev->pble_rsrc = (struct i40iw_hmc_pble_rsrc *)iwdev->hmc_info_mem;
 	dev->hmc_info = &iwdev->hw.hmc;
 	dev->hmc_info->hmc_obj = (struct i40iw_hmc_obj_info *)(iwdev->pble_rsrc + 1);
@@ -1325,7 +1333,17 @@
 	info.bar0 = ldev->hw_addr;
 	info.hw = &iwdev->hw;
 	info.debug_mask = debug;
-	info.qs_handle = ldev->params.qos.prio_qos[0].qs_handle;
+	l2params.mss =
+		(ldev->params.mtu) ? ldev->params.mtu - I40IW_MTU_TO_MSS : I40IW_DEFAULT_MSS;
+	for (i = 0; i < I40E_CLIENT_MAX_USER_PRIORITY; i++) {
+		qset = ldev->params.qos.prio_qos[i].qs_handle;
+		l2params.qs_handle_list[i] = qset;
+		if (last_qset == I40IW_NO_QSET)
+			last_qset = qset;
+		else if ((qset != last_qset) && (qset != I40IW_NO_QSET))
+			iwdev->dcb = true;
+	}
+	i40iw_pr_info("DCB is set/clear = %d\n", iwdev->dcb);
 	info.exception_lan_queue = 1;
 	info.vchnl_send = i40iw_virtchnl_send;
 	status = i40iw_device_init(&iwdev->sc_dev, &info);
@@ -1334,6 +1352,20 @@
 		kfree(iwdev->hmc_info_mem);
 		iwdev->hmc_info_mem = NULL;
 	}
+	memset(&vsi_info, 0, sizeof(vsi_info));
+	vsi_info.dev = &iwdev->sc_dev;
+	vsi_info.back_vsi = (void *)iwdev;
+	vsi_info.params = &l2params;
+	i40iw_sc_vsi_init(&iwdev->vsi, &vsi_info);
+
+	if (dev->is_pf) {
+		memset(&stats_info, 0, sizeof(stats_info));
+		stats_info.fcn_id = ldev->fid;
+		stats_info.pestat = kzalloc(sizeof(*stats_info.pestat), GFP_KERNEL);
+		stats_info.stats_initialize = true;
+		if (stats_info.pestat)
+			i40iw_vsi_stats_init(&iwdev->vsi, &stats_info);
+	}
 	return status;
 }
 
@@ -1384,6 +1416,7 @@
 	for (i = 0, ceq_idx = 0; i < iwdev->msix_count; i++, iw_qvinfo++) {
 		iwdev->iw_msixtbl[i].idx = ldev->msix_entries[i].entry;
 		iwdev->iw_msixtbl[i].irq = ldev->msix_entries[i].vector;
+		iwdev->iw_msixtbl[i].cpu_affinity = ceq_idx;
 		if (i == 0) {
 			iw_qvinfo->aeq_idx = 0;
 			if (iwdev->msix_shared)
@@ -1404,18 +1437,19 @@
  * i40iw_deinit_device - clean up the device resources
  * @iwdev: iwarp device
  * @reset: true if called before reset
- * @del_hdl: true if delete hdl entry
  *
  * Destroy the ib device interface, remove the mac ip entry and ipv4/ipv6 addresses,
  * destroy the device queues and free the pble and the hmc objects
  */
-static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del_hdl)
+static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset)
 {
 	struct i40e_info *ldev = iwdev->ldev;
 
 	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
 
 	i40iw_pr_info("state = %d\n", iwdev->init_state);
+	if (iwdev->param_wq)
+		destroy_workqueue(iwdev->param_wq);
 
 	switch (iwdev->init_state) {
 	case RDMA_DEV_REGISTERED:
@@ -1441,10 +1475,10 @@
 		i40iw_destroy_aeq(iwdev, reset);
 		/* fallthrough */
 	case IEQ_CREATED:
-		i40iw_puda_dele_resources(dev, I40IW_PUDA_RSRC_TYPE_IEQ, reset);
+		i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_IEQ, reset);
 		/* fallthrough */
 	case ILQ_CREATED:
-		i40iw_puda_dele_resources(dev, I40IW_PUDA_RSRC_TYPE_ILQ, reset);
+		i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_ILQ, reset);
 		/* fallthrough */
 	case CCQ_CREATED:
 		i40iw_destroy_ccq(iwdev, reset);
@@ -1456,13 +1490,14 @@
 		i40iw_del_hmc_objects(dev, dev->hmc_info, true, reset);
 		/* fallthrough */
 	case CQP_CREATED:
-		i40iw_destroy_cqp(iwdev, !reset);
+		i40iw_destroy_cqp(iwdev, true);
 		/* fallthrough */
 	case INITIAL_STATE:
 		i40iw_cleanup_cm_core(&iwdev->cm_core);
-		if (dev->is_pf)
-			i40iw_hw_stats_del_timer(dev);
-
+		if (iwdev->vsi.pestat) {
+			i40iw_vsi_stats_free(&iwdev->vsi);
+			kfree(iwdev->vsi.pestat);
+		}
 		i40iw_del_init_mem(iwdev);
 		break;
 	case INVALID_STATE:
@@ -1472,8 +1507,7 @@
 		break;
 	}
 
-	if (del_hdl)
-		i40iw_del_handler(i40iw_find_i40e_handler(ldev));
+	i40iw_del_handler(i40iw_find_i40e_handler(ldev));
 	kfree(iwdev->hdl);
 }
 
@@ -1508,7 +1542,6 @@
 	iwdev->max_enabled_vfs = iwdev->max_rdma_vfs;
 	iwdev->netdev = ldev->netdev;
 	hdl->client = client;
-	iwdev->mss = (!ldev->params.mtu) ? I40IW_DEFAULT_MSS : ldev->params.mtu - I40IW_MTU_TO_MSS;
 	if (!ldev->ftype)
 		iwdev->db_start = pci_resource_start(ldev->pcidev, 0) + I40IW_DB_ADDR_OFFSET;
 	else
@@ -1528,6 +1561,7 @@
 
 	init_waitqueue_head(&iwdev->vchnl_waitq);
 	init_waitqueue_head(&dev->vf_reqs);
+	init_waitqueue_head(&iwdev->close_wq);
 
 	status = i40iw_initialize_dev(iwdev, ldev);
 exit:
@@ -1540,6 +1574,20 @@
 }
 
 /**
+ * i40iw_get_used_rsrc - determine resources used internally
+ * @iwdev: iwarp device
+ *
+ * Called after internal allocations
+ */
+static void i40iw_get_used_rsrc(struct i40iw_device *iwdev)
+{
+	iwdev->used_pds = find_next_zero_bit(iwdev->allocated_pds, iwdev->max_pd, 0);
+	iwdev->used_qps = find_next_zero_bit(iwdev->allocated_qps, iwdev->max_qp, 0);
+	iwdev->used_cqs = find_next_zero_bit(iwdev->allocated_cqs, iwdev->max_cq, 0);
+	iwdev->used_mrs = find_next_zero_bit(iwdev->allocated_mrs, iwdev->max_mr, 0);
+}
+
+/**
  * i40iw_open - client interface operation open for iwarp/uda device
  * @ldev: lan device information
  * @client: iwarp client information, provided during registration
@@ -1611,6 +1659,7 @@
 		status = i40iw_initialize_hw_resources(iwdev);
 		if (status)
 			break;
+		i40iw_get_used_rsrc(iwdev);
 		dev->ccq_ops->ccq_arm(dev->ccq);
 		status = i40iw_hmc_init_pble(&iwdev->sc_dev, iwdev->pble_rsrc);
 		if (status)
@@ -1630,35 +1679,73 @@
 		iwdev->init_state = RDMA_DEV_REGISTERED;
 		iwdev->iw_status = 1;
 		i40iw_port_ibevent(iwdev);
+		iwdev->param_wq = alloc_ordered_workqueue("l2params", WQ_MEM_RECLAIM);
+		if(iwdev->param_wq == NULL)
+			break;
 		i40iw_pr_info("i40iw_open completed\n");
 		return 0;
 	} while (0);
 
 	i40iw_pr_err("status = %d last completion = %d\n", status, iwdev->init_state);
-	i40iw_deinit_device(iwdev, false, false);
+	i40iw_deinit_device(iwdev, false);
 	return -ERESTART;
 }
 
 /**
- * i40iw_l2param_change : handle qs handles for qos and mss change
+ * i40iw_l2params_worker - worker for l2 params change
+ * @work: work pointer for l2 params
+ */
+static void i40iw_l2params_worker(struct work_struct *work)
+{
+	struct l2params_work *dwork =
+	    container_of(work, struct l2params_work, work);
+	struct i40iw_device *iwdev = dwork->iwdev;
+
+	i40iw_change_l2params(&iwdev->vsi, &dwork->l2params);
+	atomic_dec(&iwdev->params_busy);
+	kfree(work);
+}
+
+/**
+ * i40iw_l2param_change - handle qs handles for qos and mss change
  * @ldev: lan device information
  * @client: client for paramater change
  * @params: new parameters from L2
  */
-static void i40iw_l2param_change(struct i40e_info *ldev,
-				 struct i40e_client *client,
+static void i40iw_l2param_change(struct i40e_info *ldev, struct i40e_client *client,
 				 struct i40e_params *params)
 {
 	struct i40iw_handler *hdl;
+	struct i40iw_l2params *l2params;
+	struct l2params_work *work;
 	struct i40iw_device *iwdev;
+	int i;
 
 	hdl = i40iw_find_i40e_handler(ldev);
 	if (!hdl)
 		return;
 
 	iwdev = &hdl->device;
-	if (params->mtu)
-		iwdev->mss = params->mtu - I40IW_MTU_TO_MSS;
+
+	if (atomic_read(&iwdev->params_busy))
+		return;
+
+
+	work = kzalloc(sizeof(*work), GFP_ATOMIC);
+	if (!work)
+		return;
+
+	atomic_inc(&iwdev->params_busy);
+
+	work->iwdev = iwdev;
+	l2params = &work->l2params;
+	for (i = 0; i < I40E_CLIENT_MAX_USER_PRIORITY; i++)
+		l2params->qs_handle_list[i] = params->qos.prio_qos[i].qs_handle;
+
+	l2params->mss = (params->mtu) ? params->mtu - I40IW_MTU_TO_MSS : iwdev->vsi.mss;
+
+	INIT_WORK(&work->work, i40iw_l2params_worker);
+	queue_work(iwdev->param_wq, &work->work);
 }
 
 /**
@@ -1679,8 +1766,11 @@
 		return;
 
 	iwdev = &hdl->device;
+	iwdev->closing = true;
+
+	i40iw_cm_disconnect_all(iwdev);
 	destroy_workqueue(iwdev->virtchnl_wq);
-	i40iw_deinit_device(iwdev, reset, true);
+	i40iw_deinit_device(iwdev, reset);
 }
 
 /**
@@ -1701,21 +1791,23 @@
 	struct i40iw_vfdev *tmp_vfdev;
 	unsigned int i;
 	unsigned long flags;
+	struct i40iw_device *iwdev;
 
 	hdl = i40iw_find_i40e_handler(ldev);
 	if (!hdl)
 		return;
 
 	dev = &hdl->device.sc_dev;
+	iwdev = (struct i40iw_device *)dev->back_dev;
 
 	for (i = 0; i < I40IW_MAX_PE_ENABLED_VF_COUNT; i++) {
 		if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id != vf_id))
 			continue;
 		/* free all resources allocated on behalf of vf */
 		tmp_vfdev = dev->vf_dev[i];
-		spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags);
+		spin_lock_irqsave(&iwdev->vsi.pestat->lock, flags);
 		dev->vf_dev[i] = NULL;
-		spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags);
+		spin_unlock_irqrestore(&iwdev->vsi.pestat->lock, flags);
 		i40iw_del_hmc_objects(dev, &tmp_vfdev->hmc_info, false, false);
 		/* remove vf hmc function */
 		memset(&hmc_fcn_info, 0, sizeof(hmc_fcn_info));
diff --git a/drivers/infiniband/hw/i40iw/i40iw_osdep.h b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
index 80f422b..aa66c1c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_osdep.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
@@ -198,6 +198,8 @@
 void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev,
 			    struct i40iw_virtchnl_work_info *work_info, u32 iw_vf_idx);
 void *i40iw_remove_head(struct list_head *list);
+void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, bool suspend);
+void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
 
 void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len);
 void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred);
@@ -207,9 +209,9 @@
 enum i40iw_status_code i40iw_hw_manage_vf_pble_bp(struct i40iw_device *iwdev,
 						  struct i40iw_manage_vf_pble_info *info,
 						  bool wait);
-struct i40iw_dev_pestat;
-void i40iw_hw_stats_start_timer(struct i40iw_sc_dev *);
-void i40iw_hw_stats_del_timer(struct i40iw_sc_dev *);
+struct i40iw_sc_vsi;
+void i40iw_hw_stats_start_timer(struct i40iw_sc_vsi *vsi);
+void i40iw_hw_stats_stop_timer(struct i40iw_sc_vsi *vsi);
 #define i40iw_mmiowb() mmiowb()
 void i40iw_wr32(struct i40iw_hw *hw, u32 reg, u32 value);
 u32  i40iw_rd32(struct i40iw_hw *hw, u32 reg);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_p.h b/drivers/infiniband/hw/i40iw/i40iw_p.h
index a0b8ca1..28a92fe 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_p.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_p.h
@@ -47,8 +47,6 @@
 enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev,
 					 struct i40iw_device_init_info *info);
 
-enum i40iw_status_code i40iw_device_init_pestat(struct i40iw_dev_pestat *);
-
 void i40iw_sc_cqp_post_sq(struct i40iw_sc_cqp *cqp);
 
 u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch);
@@ -64,7 +62,24 @@
 enum i40iw_status_code i40iw_pf_init_vfhmc(struct i40iw_sc_dev *dev, u8 vf_hmc_fn_id,
 					   u32 *vf_cnt_array);
 
-/* cqp misc functions */
+/* stats functions */
+void i40iw_hw_stats_refresh_all(struct i40iw_vsi_pestat *stats);
+void i40iw_hw_stats_read_all(struct i40iw_vsi_pestat *stats, struct i40iw_dev_hw_stats *stats_values);
+void i40iw_hw_stats_read_32(struct i40iw_vsi_pestat *stats,
+			    enum i40iw_hw_stats_index_32b index,
+			    u64 *value);
+void i40iw_hw_stats_read_64(struct i40iw_vsi_pestat *stats,
+			    enum i40iw_hw_stats_index_64b index,
+			    u64 *value);
+void i40iw_hw_stats_init(struct i40iw_vsi_pestat *stats, u8 index, bool is_pf);
+
+/* vsi misc functions */
+enum i40iw_status_code i40iw_vsi_stats_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_stats_info *info);
+void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi);
+void i40iw_sc_vsi_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_init_info *info);
+
+void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2params);
+void i40iw_qp_add_qos(struct i40iw_sc_qp *qp);
 
 void i40iw_terminate_send_fin(struct i40iw_sc_qp *qp);
 
diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c
index 85993dc..c87ba16 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_pble.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c
@@ -353,10 +353,6 @@
 	pages = (idx->rel_pd_idx) ? (I40IW_HMC_PD_CNT_IN_SD -
 			idx->rel_pd_idx) : I40IW_HMC_PD_CNT_IN_SD;
 	pages = min(pages, pble_rsrc->unallocated_pble >> PBLE_512_SHIFT);
-	if (!pages) {
-		ret_code = I40IW_ERR_NO_PBLCHUNKS_AVAILABLE;
-		goto error;
-	}
 	info.chunk = chunk;
 	info.hmc_info = hmc_info;
 	info.pages = pages;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
index c62d354..449ba8c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c
@@ -42,12 +42,13 @@
 #include "i40iw_p.h"
 #include "i40iw_puda.h"
 
-static void i40iw_ieq_receive(struct i40iw_sc_dev *dev,
+static void i40iw_ieq_receive(struct i40iw_sc_vsi *vsi,
 			      struct i40iw_puda_buf *buf);
-static void i40iw_ieq_tx_compl(struct i40iw_sc_dev *dev, void *sqwrid);
+static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid);
 static void i40iw_ilq_putback_rcvbuf(struct i40iw_sc_qp *qp, u32 wqe_idx);
 static enum i40iw_status_code i40iw_puda_replenish_rq(struct i40iw_puda_rsrc
 						      *rsrc, bool initial);
+static void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp);
 /**
  * i40iw_puda_get_listbuf - get buffer from puda list
  * @list: list to use for buffers (ILQ or IEQ)
@@ -292,7 +293,7 @@
 	unsigned long	flags;
 
 	if ((cq_type == I40IW_CQ_TYPE_ILQ) || (cq_type == I40IW_CQ_TYPE_IEQ)) {
-		rsrc = (cq_type == I40IW_CQ_TYPE_ILQ) ? dev->ilq : dev->ieq;
+		rsrc = (cq_type == I40IW_CQ_TYPE_ILQ) ? cq->vsi->ilq : cq->vsi->ieq;
 	} else {
 		i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s qp_type error\n", __func__);
 		return I40IW_ERR_BAD_PTR;
@@ -335,7 +336,7 @@
 		rsrc->stats_pkt_rcvd++;
 		rsrc->compl_rxwqe_idx = info.wqe_idx;
 		i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s RQ completion\n", __func__);
-		rsrc->receive(rsrc->dev, buf);
+		rsrc->receive(rsrc->vsi, buf);
 		if (cq_type == I40IW_CQ_TYPE_ILQ)
 			i40iw_ilq_putback_rcvbuf(&rsrc->qp, info.wqe_idx);
 		else
@@ -345,12 +346,12 @@
 		i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s SQ completion\n", __func__);
 		sqwrid = (void *)(uintptr_t)qp->sq_wrtrk_array[info.wqe_idx].wrid;
 		I40IW_RING_SET_TAIL(qp->sq_ring, info.wqe_idx);
-		rsrc->xmit_complete(rsrc->dev, sqwrid);
+		rsrc->xmit_complete(rsrc->vsi, sqwrid);
 		spin_lock_irqsave(&rsrc->bufpool_lock, flags);
 		rsrc->tx_wqe_avail_cnt++;
 		spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
-		if (!list_empty(&dev->ilq->txpend))
-			i40iw_puda_send_buf(dev->ilq, NULL);
+		if (!list_empty(&rsrc->vsi->ilq->txpend))
+			i40iw_puda_send_buf(rsrc->vsi->ilq, NULL);
 	}
 
 done:
@@ -513,10 +514,8 @@
  * i40iw_puda_qp_wqe - setup wqe for qp create
  * @rsrc: resource for qp
  */
-static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_puda_rsrc *rsrc)
+static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
 {
-	struct i40iw_sc_qp *qp = &rsrc->qp;
-	struct i40iw_sc_dev *dev = rsrc->dev;
 	struct i40iw_sc_cqp *cqp;
 	u64 *wqe;
 	u64 header;
@@ -582,6 +581,7 @@
 	qp->back_qp = (void *)rsrc;
 	qp->sq_pa = mem->pa;
 	qp->rq_pa = qp->sq_pa + sq_size;
+	qp->vsi = rsrc->vsi;
 	ukqp->sq_base = mem->va;
 	ukqp->rq_base = &ukqp->sq_base[rsrc->sq_size];
 	ukqp->shadow_area = ukqp->rq_base[rsrc->rq_size].elem;
@@ -608,15 +608,63 @@
 		ukqp->wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) +
 						    I40E_VFPE_WQEALLOC1);
 
-	qp->qs_handle = qp->dev->qs_handle;
+	qp->user_pri = 0;
+	i40iw_qp_add_qos(qp);
 	i40iw_puda_qp_setctx(rsrc);
-	ret = i40iw_puda_qp_wqe(rsrc);
+	if (rsrc->ceq_valid)
+		ret = i40iw_cqp_qp_create_cmd(rsrc->dev, qp);
+	else
+		ret = i40iw_puda_qp_wqe(rsrc->dev, qp);
 	if (ret)
 		i40iw_free_dma_mem(rsrc->dev->hw, &rsrc->qpmem);
 	return ret;
 }
 
 /**
+ * i40iw_puda_cq_wqe - setup wqe for cq create
+ * @rsrc: resource for cq
+ */
+static enum i40iw_status_code i40iw_puda_cq_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq)
+{
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+	struct i40iw_ccq_cqe_info compl_info;
+	enum i40iw_status_code status = 0;
+
+	cqp = dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
+	set_64bit_val(wqe, 8, RS_64_1(cq, 1));
+	set_64bit_val(wqe, 16,
+		      LS_64(cq->shadow_read_threshold,
+			    I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
+	set_64bit_val(wqe, 32, cq->cq_pa);
+
+	set_64bit_val(wqe, 40, cq->shadow_area_pa);
+
+	header = cq->cq_uk.cq_id |
+	    LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
+	    LS_64(1, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
+	    LS_64(1, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+	    LS_64(1, I40IW_CQPSQ_CQ_CEQIDVALID) |
+	    LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+	set_64bit_val(wqe, 24, header);
+
+	i40iw_debug_buf(dev, I40IW_DEBUG_PUDA, "PUDA CQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	i40iw_sc_cqp_post_sq(dev->cqp);
+	status = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+						 I40IW_CQP_OP_CREATE_CQ,
+						 &compl_info);
+	return status;
+}
+
+/**
  * i40iw_puda_cq_create - create cq for resource
  * @rsrc: resource for which cq to create
  */
@@ -624,18 +672,13 @@
 {
 	struct i40iw_sc_dev *dev = rsrc->dev;
 	struct i40iw_sc_cq *cq = &rsrc->cq;
-	u64 *wqe;
-	struct i40iw_sc_cqp *cqp;
-	u64 header;
 	enum i40iw_status_code ret = 0;
 	u32 tsize, cqsize;
-	u32 shadow_read_threshold = 128;
 	struct i40iw_dma_mem *mem;
-	struct i40iw_ccq_cqe_info compl_info;
 	struct i40iw_cq_init_info info;
 	struct i40iw_cq_uk_init_info *init_info = &info.cq_uk_init_info;
 
-	cq->back_cq = (void *)rsrc;
+	cq->vsi = rsrc->vsi;
 	cqsize = rsrc->cq_size * (sizeof(struct i40iw_cqe));
 	tsize = cqsize + sizeof(struct i40iw_cq_shadow_area);
 	ret = i40iw_allocate_dma_mem(dev->hw, &rsrc->cqmem, tsize,
@@ -656,39 +699,15 @@
 	init_info->shadow_area = (u64 *)((u8 *)mem->va + cqsize);
 	init_info->cq_size = rsrc->cq_size;
 	init_info->cq_id = rsrc->cq_id;
+	info.ceqe_mask = true;
+	info.ceq_id_valid = true;
 	ret = dev->iw_priv_cq_ops->cq_init(cq, &info);
 	if (ret)
 		goto error;
-	cqp = dev->cqp;
-	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0);
-	if (!wqe) {
-		ret = I40IW_ERR_RING_FULL;
-		goto error;
-	}
-
-	set_64bit_val(wqe, 0, rsrc->cq_size);
-	set_64bit_val(wqe, 8, RS_64_1(cq, 1));
-	set_64bit_val(wqe, 16, LS_64(shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
-	set_64bit_val(wqe, 32, cq->cq_pa);
-
-	set_64bit_val(wqe, 40, cq->shadow_area_pa);
-
-	header = rsrc->cq_id |
-	    LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
-	    LS_64(1, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
-	    LS_64(1, I40IW_CQPSQ_CQ_ENCEQEMASK) |
-	    LS_64(1, I40IW_CQPSQ_CQ_CEQIDVALID) |
-	    LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-	set_64bit_val(wqe, 24, header);
-
-	i40iw_debug_buf(dev, I40IW_DEBUG_PUDA, "PUDA CQE",
-			wqe, I40IW_CQP_WQE_SIZE * 8);
-
-	i40iw_sc_cqp_post_sq(dev->cqp);
-	ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
-						 I40IW_CQP_OP_CREATE_CQ,
-						 &compl_info);
-
+	if (rsrc->ceq_valid)
+		ret = i40iw_cqp_cq_create_cmd(dev, cq);
+	else
+		ret = i40iw_puda_cq_wqe(dev, cq);
 error:
 	if (ret)
 		i40iw_free_dma_mem(dev->hw, &rsrc->cqmem);
@@ -696,30 +715,94 @@
 }
 
 /**
+ * i40iw_puda_free_qp - free qp for resource
+ * @rsrc: resource for which qp to free
+ */
+static void i40iw_puda_free_qp(struct i40iw_puda_rsrc *rsrc)
+{
+	enum i40iw_status_code ret;
+	struct i40iw_ccq_cqe_info compl_info;
+	struct i40iw_sc_dev *dev = rsrc->dev;
+
+	if (rsrc->ceq_valid) {
+		i40iw_cqp_qp_destroy_cmd(dev, &rsrc->qp);
+		return;
+	}
+
+	ret = dev->iw_priv_qp_ops->qp_destroy(&rsrc->qp,
+			0, false, true, true);
+	if (ret)
+		i40iw_debug(dev, I40IW_DEBUG_PUDA,
+			    "%s error puda qp destroy wqe\n",
+			    __func__);
+
+	if (!ret) {
+		ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+				I40IW_CQP_OP_DESTROY_QP,
+				&compl_info);
+		if (ret)
+			i40iw_debug(dev, I40IW_DEBUG_PUDA,
+				    "%s error puda qp destroy failed\n",
+				    __func__);
+	}
+}
+
+/**
+ * i40iw_puda_free_cq - free cq for resource
+ * @rsrc: resource for which cq to free
+ */
+static void i40iw_puda_free_cq(struct i40iw_puda_rsrc *rsrc)
+{
+	enum i40iw_status_code ret;
+	struct i40iw_ccq_cqe_info compl_info;
+	struct i40iw_sc_dev *dev = rsrc->dev;
+
+	if (rsrc->ceq_valid) {
+		i40iw_cqp_cq_destroy_cmd(dev, &rsrc->cq);
+		return;
+	}
+	ret = dev->iw_priv_cq_ops->cq_destroy(&rsrc->cq, 0, true);
+
+	if (ret)
+		i40iw_debug(dev, I40IW_DEBUG_PUDA,
+			    "%s error ieq cq destroy\n",
+			    __func__);
+
+	if (!ret) {
+		ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+				I40IW_CQP_OP_DESTROY_CQ,
+				&compl_info);
+		if (ret)
+			i40iw_debug(dev, I40IW_DEBUG_PUDA,
+				    "%s error ieq qp destroy done\n",
+				    __func__);
+	}
+}
+
+/**
  * i40iw_puda_dele_resources - delete all resources during close
  * @dev: iwarp device
  * @type: type of resource to dele
  * @reset: true if reset chip
  */
-void i40iw_puda_dele_resources(struct i40iw_sc_dev *dev,
+void i40iw_puda_dele_resources(struct i40iw_sc_vsi *vsi,
 			       enum puda_resource_type type,
 			       bool reset)
 {
-	struct i40iw_ccq_cqe_info compl_info;
+	struct i40iw_sc_dev *dev = vsi->dev;
 	struct i40iw_puda_rsrc *rsrc;
 	struct i40iw_puda_buf *buf = NULL;
 	struct i40iw_puda_buf *nextbuf = NULL;
 	struct i40iw_virt_mem *vmem;
-	enum i40iw_status_code ret;
 
 	switch (type) {
 	case I40IW_PUDA_RSRC_TYPE_ILQ:
-		rsrc = dev->ilq;
-		vmem = &dev->ilq_mem;
+		rsrc = vsi->ilq;
+		vmem = &vsi->ilq_mem;
 		break;
 	case I40IW_PUDA_RSRC_TYPE_IEQ:
-		rsrc = dev->ieq;
-		vmem = &dev->ieq_mem;
+		rsrc = vsi->ieq;
+		vmem = &vsi->ieq_mem;
 		break;
 	default:
 		i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s: error resource type = 0x%x\n",
@@ -731,45 +814,14 @@
 	case PUDA_HASH_CRC_COMPLETE:
 		i40iw_free_hash_desc(rsrc->hash_desc);
 	case PUDA_QP_CREATED:
-		do {
-			if (reset)
-				break;
-			ret = dev->iw_priv_qp_ops->qp_destroy(&rsrc->qp,
-							      0, false, true, true);
-			if (ret)
-				i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
-					    "%s error ieq qp destroy\n",
-					    __func__);
-
-			ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
-								 I40IW_CQP_OP_DESTROY_QP,
-								 &compl_info);
-			if (ret)
-				i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
-					    "%s error ieq qp destroy done\n",
-					    __func__);
-		} while (0);
+		if (!reset)
+			i40iw_puda_free_qp(rsrc);
 
 		i40iw_free_dma_mem(dev->hw, &rsrc->qpmem);
 		/* fallthrough */
 	case PUDA_CQ_CREATED:
-		do {
-			if (reset)
-				break;
-			ret = dev->iw_priv_cq_ops->cq_destroy(&rsrc->cq, 0, true);
-			if (ret)
-				i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
-					    "%s error ieq cq destroy\n",
-					    __func__);
-
-			ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
-								 I40IW_CQP_OP_DESTROY_CQ,
-								 &compl_info);
-			if (ret)
-				i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
-					    "%s error ieq qp destroy done\n",
-					    __func__);
-		} while (0);
+		if (!reset)
+			i40iw_puda_free_cq(rsrc);
 
 		i40iw_free_dma_mem(dev->hw, &rsrc->cqmem);
 		break;
@@ -825,9 +877,10 @@
  * @dev: iwarp device
  * @info: resource information
  */
-enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev,
+enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi,
 					      struct i40iw_puda_rsrc_info *info)
 {
+	struct i40iw_sc_dev *dev = vsi->dev;
 	enum i40iw_status_code ret = 0;
 	struct i40iw_puda_rsrc *rsrc;
 	u32 pudasize;
@@ -840,10 +893,10 @@
 	rqwridsize = info->rq_size * 8;
 	switch (info->type) {
 	case I40IW_PUDA_RSRC_TYPE_ILQ:
-		vmem = &dev->ilq_mem;
+		vmem = &vsi->ilq_mem;
 		break;
 	case I40IW_PUDA_RSRC_TYPE_IEQ:
-		vmem = &dev->ieq_mem;
+		vmem = &vsi->ieq_mem;
 		break;
 	default:
 		return I40IW_NOT_SUPPORTED;
@@ -856,22 +909,22 @@
 	rsrc = (struct i40iw_puda_rsrc *)vmem->va;
 	spin_lock_init(&rsrc->bufpool_lock);
 	if (info->type == I40IW_PUDA_RSRC_TYPE_ILQ) {
-		dev->ilq = (struct i40iw_puda_rsrc *)vmem->va;
-		dev->ilq_count = info->count;
+		vsi->ilq = (struct i40iw_puda_rsrc *)vmem->va;
+		vsi->ilq_count = info->count;
 		rsrc->receive = info->receive;
 		rsrc->xmit_complete = info->xmit_complete;
 	} else {
-		vmem = &dev->ieq_mem;
-		dev->ieq_count = info->count;
-		dev->ieq = (struct i40iw_puda_rsrc *)vmem->va;
+		vmem = &vsi->ieq_mem;
+		vsi->ieq_count = info->count;
+		vsi->ieq = (struct i40iw_puda_rsrc *)vmem->va;
 		rsrc->receive = i40iw_ieq_receive;
 		rsrc->xmit_complete = i40iw_ieq_tx_compl;
 	}
 
+	rsrc->ceq_valid = info->ceq_valid;
 	rsrc->type = info->type;
 	rsrc->sq_wrtrk_array = (struct i40iw_sq_uk_wr_trk_info *)((u8 *)vmem->va + pudasize);
 	rsrc->rq_wrid_array = (u64 *)((u8 *)vmem->va + pudasize + sqwridsize);
-	rsrc->mss = info->mss;
 	/* Initialize all ieq lists */
 	INIT_LIST_HEAD(&rsrc->bufpool);
 	INIT_LIST_HEAD(&rsrc->txpend);
@@ -885,6 +938,7 @@
 	rsrc->cq_size = info->rq_size + info->sq_size;
 	rsrc->buf_size = info->buf_size;
 	rsrc->dev = dev;
+	rsrc->vsi = vsi;
 
 	ret = i40iw_puda_cq_create(rsrc);
 	if (!ret) {
@@ -919,7 +973,7 @@
 	dev->ccq_ops->ccq_arm(&rsrc->cq);
 	return ret;
  error:
-	i40iw_puda_dele_resources(dev, info->type, false);
+	i40iw_puda_dele_resources(vsi, info->type, false);
 
 	return ret;
 }
@@ -1131,7 +1185,7 @@
 	list_add(&buf->list, &pbufl);
 
 	status = i40iw_ieq_create_pbufl(pfpdu, rxlist, &pbufl, buf, fpdu_len);
-	if (!status)
+	if (status)
 		goto error;
 
 	txbuf = i40iw_puda_get_bufpool(ieq);
@@ -1332,7 +1386,7 @@
 	}
 	if (pfpdu->mode && (fps != pfpdu->fps)) {
 		/* clean up qp as it is new partial sequence */
-		i40iw_ieq_cleanup_qp(ieq->dev, qp);
+		i40iw_ieq_cleanup_qp(ieq, qp);
 		i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
 			    "%s: restarting new partial\n", __func__);
 		pfpdu->mode = false;
@@ -1344,7 +1398,7 @@
 		pfpdu->rcv_nxt = fps;
 		pfpdu->fps = fps;
 		pfpdu->mode = true;
-		pfpdu->max_fpdu_data = ieq->mss;
+		pfpdu->max_fpdu_data = ieq->vsi->mss;
 		pfpdu->pmode_count++;
 		INIT_LIST_HEAD(rxlist);
 		i40iw_ieq_check_first_buf(buf, fps);
@@ -1379,14 +1433,14 @@
  * @dev: iwarp device
  * @buf: exception buffer received
  */
-static void i40iw_ieq_receive(struct i40iw_sc_dev *dev,
+static void i40iw_ieq_receive(struct i40iw_sc_vsi *vsi,
 			      struct i40iw_puda_buf *buf)
 {
-	struct i40iw_puda_rsrc *ieq = dev->ieq;
+	struct i40iw_puda_rsrc *ieq = vsi->ieq;
 	struct i40iw_sc_qp *qp = NULL;
 	u32 wqe_idx = ieq->compl_rxwqe_idx;
 
-	qp = i40iw_ieq_get_qp(dev, buf);
+	qp = i40iw_ieq_get_qp(vsi->dev, buf);
 	if (!qp) {
 		ieq->stats_bad_qp_id++;
 		i40iw_puda_ret_bufpool(ieq, buf);
@@ -1404,12 +1458,12 @@
 
 /**
  * i40iw_ieq_tx_compl - put back after sending completed exception buffer
- * @dev: iwarp device
+ * @vsi: pointer to the vsi structure
  * @sqwrid: pointer to puda buffer
  */
-static void i40iw_ieq_tx_compl(struct i40iw_sc_dev *dev, void *sqwrid)
+static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid)
 {
-	struct i40iw_puda_rsrc *ieq = dev->ieq;
+	struct i40iw_puda_rsrc *ieq = vsi->ieq;
 	struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)sqwrid;
 
 	i40iw_puda_ret_bufpool(ieq, buf);
@@ -1421,15 +1475,14 @@
 
 /**
  * i40iw_ieq_cleanup_qp - qp is being destroyed
- * @dev: iwarp device
+ * @ieq: ieq resource
  * @qp: all pending fpdu buffers
  */
-void i40iw_ieq_cleanup_qp(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
+static void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp)
 {
 	struct i40iw_puda_buf *buf;
 	struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
 	struct list_head *rxlist = &pfpdu->rxlist;
-	struct i40iw_puda_rsrc *ieq = dev->ieq;
 
 	if (!pfpdu->mode)
 		return;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.h b/drivers/infiniband/hw/i40iw/i40iw_puda.h
index 52bf782..dba05ce 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.h
@@ -100,6 +100,7 @@
 	enum puda_resource_type type;	/* ILQ or IEQ */
 	u32 count;
 	u16 pd_id;
+	bool ceq_valid;
 	u32 cq_id;
 	u32 qp_id;
 	u32 sq_size;
@@ -107,8 +108,8 @@
 	u16 buf_size;
 	u16 mss;
 	u32 tx_buf_cnt;		/* total bufs allocated will be rq_size + tx_buf_cnt */
-	void (*receive)(struct i40iw_sc_dev *, struct i40iw_puda_buf *);
-	void (*xmit_complete)(struct i40iw_sc_dev *, void *);
+	void (*receive)(struct i40iw_sc_vsi *, struct i40iw_puda_buf *);
+	void (*xmit_complete)(struct i40iw_sc_vsi *, void *);
 };
 
 struct i40iw_puda_rsrc {
@@ -116,6 +117,7 @@
 	struct i40iw_sc_qp qp;
 	struct i40iw_sc_pd sc_pd;
 	struct i40iw_sc_dev *dev;
+	struct i40iw_sc_vsi *vsi;
 	struct i40iw_dma_mem cqmem;
 	struct i40iw_dma_mem qpmem;
 	struct i40iw_virt_mem ilq_mem;
@@ -123,6 +125,7 @@
 	enum puda_resource_type type;
 	u16 buf_size;		/*buffer must be max datalen + tcpip hdr + mac */
 	u16 mss;
+	bool ceq_valid;
 	u32 cq_id;
 	u32 qp_id;
 	u32 sq_size;
@@ -142,8 +145,8 @@
 	u32 avail_buf_count;		/* snapshot of currently available buffers */
 	spinlock_t bufpool_lock;
 	struct i40iw_puda_buf *alloclist;
-	void (*receive)(struct i40iw_sc_dev *, struct i40iw_puda_buf *);
-	void (*xmit_complete)(struct i40iw_sc_dev *, void *);
+	void (*receive)(struct i40iw_sc_vsi *, struct i40iw_puda_buf *);
+	void (*xmit_complete)(struct i40iw_sc_vsi *, void *);
 	/* puda stats */
 	u64 stats_buf_alloc_fail;
 	u64 stats_pkt_rcvd;
@@ -160,14 +163,13 @@
 			 struct i40iw_puda_buf *buf);
 enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp,
 				       struct i40iw_puda_send_info *info);
-enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev,
+enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi,
 					      struct i40iw_puda_rsrc_info *info);
-void i40iw_puda_dele_resources(struct i40iw_sc_dev *dev,
+void i40iw_puda_dele_resources(struct i40iw_sc_vsi *vsi,
 			       enum puda_resource_type type,
 			       bool reset);
 enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev,
 						  struct i40iw_sc_cq *cq, u32 *compl_err);
-void i40iw_ieq_cleanup_qp(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
 
 struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev,
 				     struct i40iw_puda_buf *buf);
@@ -180,4 +182,8 @@
 void i40iw_free_hash_desc(struct shash_desc *desc);
 void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length,
 				 u32 seqnum);
+enum i40iw_status_code i40iw_cqp_qp_create_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
+enum i40iw_status_code i40iw_cqp_cq_create_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq);
+void i40iw_cqp_qp_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
+void i40iw_cqp_cq_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq);
 #endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
index 2b1a04e..f3f8e9c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_type.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_type.h
@@ -61,7 +61,7 @@
 
 struct i40iw_sc_dev;
 struct i40iw_hmc_info;
-struct i40iw_dev_pestat;
+struct i40iw_vsi_pestat;
 
 struct i40iw_cqp_ops;
 struct i40iw_ccq_ops;
@@ -74,6 +74,11 @@
 struct i40iw_priv_cq_ops;
 struct i40iw_hmc_ops;
 
+enum i40iw_page_size {
+	I40IW_PAGE_SIZE_4K,
+	I40IW_PAGE_SIZE_2M
+};
+
 enum i40iw_resource_indicator_type {
 	I40IW_RSRC_INDICATOR_TYPE_ADAPTER = 0,
 	I40IW_RSRC_INDICATOR_TYPE_CQ,
@@ -186,7 +191,7 @@
 	I40IW_DEBUG_ALL		= 0xFFFFFFFF
 };
 
-enum i40iw_hw_stat_index_32b {
+enum i40iw_hw_stats_index_32b {
 	I40IW_HW_STAT_INDEX_IP4RXDISCARD = 0,
 	I40IW_HW_STAT_INDEX_IP4RXTRUNC,
 	I40IW_HW_STAT_INDEX_IP4TXNOROUTE,
@@ -199,7 +204,7 @@
 	I40IW_HW_STAT_INDEX_MAX_32
 };
 
-enum i40iw_hw_stat_index_64b {
+enum i40iw_hw_stats_index_64b {
 	I40IW_HW_STAT_INDEX_IP4RXOCTS = 0,
 	I40IW_HW_STAT_INDEX_IP4RXPKTS,
 	I40IW_HW_STAT_INDEX_IP4RXFRAGS,
@@ -229,32 +234,23 @@
 	I40IW_HW_STAT_INDEX_MAX_64
 };
 
-struct i40iw_dev_hw_stat_offsets {
-	u32 stat_offset_32[I40IW_HW_STAT_INDEX_MAX_32];
-	u32 stat_offset_64[I40IW_HW_STAT_INDEX_MAX_64];
+struct i40iw_dev_hw_stats_offsets {
+	u32 stats_offset_32[I40IW_HW_STAT_INDEX_MAX_32];
+	u32 stats_offset_64[I40IW_HW_STAT_INDEX_MAX_64];
 };
 
 struct i40iw_dev_hw_stats {
-	u64 stat_value_32[I40IW_HW_STAT_INDEX_MAX_32];
-	u64 stat_value_64[I40IW_HW_STAT_INDEX_MAX_64];
+	u64 stats_value_32[I40IW_HW_STAT_INDEX_MAX_32];
+	u64 stats_value_64[I40IW_HW_STAT_INDEX_MAX_64];
 };
 
-struct i40iw_device_pestat_ops {
-	void (*iw_hw_stat_init)(struct i40iw_dev_pestat *, u8, struct i40iw_hw *, bool);
-	void (*iw_hw_stat_read_32)(struct i40iw_dev_pestat *, enum i40iw_hw_stat_index_32b, u64 *);
-	void (*iw_hw_stat_read_64)(struct i40iw_dev_pestat *, enum i40iw_hw_stat_index_64b, u64 *);
-	void (*iw_hw_stat_read_all)(struct i40iw_dev_pestat *, struct i40iw_dev_hw_stats *);
-	void (*iw_hw_stat_refresh_all)(struct i40iw_dev_pestat *);
-};
-
-struct i40iw_dev_pestat {
+struct i40iw_vsi_pestat {
 	struct i40iw_hw *hw;
-	struct i40iw_device_pestat_ops ops;
 	struct i40iw_dev_hw_stats hw_stats;
 	struct i40iw_dev_hw_stats last_read_hw_stats;
-	struct i40iw_dev_hw_stat_offsets hw_stat_offsets;
+	struct i40iw_dev_hw_stats_offsets hw_stats_offsets;
 	struct timer_list stats_timer;
-	spinlock_t stats_lock; /* rdma stats lock */
+	spinlock_t lock; /* rdma stats lock */
 };
 
 struct i40iw_hw {
@@ -350,6 +346,7 @@
 	u64 cq_pa;
 	u64 shadow_area_pa;
 	struct i40iw_sc_dev *dev;
+	struct i40iw_sc_vsi *vsi;
 	void *pbl_list;
 	void *back_cq;
 	u32 ceq_id;
@@ -373,6 +370,7 @@
 	u64 shadow_area_pa;
 	u64 q2_pa;
 	struct i40iw_sc_dev *dev;
+	struct i40iw_sc_vsi *vsi;
 	struct i40iw_sc_pd *pd;
 	u64 *hw_host_ctx;
 	void *llp_stream_handle;
@@ -397,6 +395,9 @@
 	bool virtual_map;
 	bool flush_sq;
 	bool flush_rq;
+	u8 user_pri;
+	struct list_head list;
+	bool on_qoslist;
 	bool sq_flush;
 	enum i40iw_flush_opcode flush_code;
 	enum i40iw_term_eventtypes eventtype;
@@ -424,10 +425,16 @@
 	char parm_buffer[I40IW_VCHNL_MAX_VF_MSG_SIZE - 1];
 };
 
+struct i40iw_qos {
+	struct list_head qplist;
+	spinlock_t lock;	/* qos list */
+	u16 qs_handle;
+};
+
 struct i40iw_vfdev {
 	struct i40iw_sc_dev *pf_dev;
 	u8 *hmc_info_mem;
-	struct i40iw_dev_pestat dev_pestat;
+	struct i40iw_vsi_pestat pestat;
 	struct i40iw_hmc_pble_info *pble_info;
 	struct i40iw_hmc_info hmc_info;
 	struct i40iw_vchnl_vf_msg_buffer vf_msg_buffer;
@@ -441,11 +448,28 @@
 	bool stats_initialized;
 };
 
+#define I40IW_INVALID_FCN_ID 0xff
+struct i40iw_sc_vsi {
+	struct i40iw_sc_dev *dev;
+	void *back_vsi; /* Owned by OS */
+	u32 ilq_count;
+	struct i40iw_virt_mem ilq_mem;
+	struct i40iw_puda_rsrc *ilq;
+	u32 ieq_count;
+	struct i40iw_virt_mem ieq_mem;
+	struct i40iw_puda_rsrc *ieq;
+	u16 mss;
+	u8 fcn_id;
+	bool stats_fcn_id_alloc;
+	struct i40iw_qos qos[I40IW_MAX_USER_PRIORITY];
+	struct i40iw_vsi_pestat *pestat;
+};
+
 struct i40iw_sc_dev {
 	struct list_head cqp_cmd_head;	/* head of the CQP command list */
 	spinlock_t cqp_lock; /* cqp list sync */
 	struct i40iw_dev_uk dev_uk;
-	struct i40iw_dev_pestat dev_pestat;
+	bool fcn_id_array[I40IW_MAX_STATS_COUNT];
 	struct i40iw_dma_mem vf_fpm_query_buf[I40IW_MAX_PE_ENABLED_VF_COUNT];
 	u64 fpm_query_buf_pa;
 	u64 fpm_commit_buf_pa;
@@ -472,17 +496,9 @@
 	struct i40iw_cqp_misc_ops *cqp_misc_ops;
 	struct i40iw_hmc_ops *hmc_ops;
 	struct i40iw_vchnl_if vchnl_if;
-	u32 ilq_count;
-	struct i40iw_virt_mem ilq_mem;
-	struct i40iw_puda_rsrc *ilq;
-	u32 ieq_count;
-	struct i40iw_virt_mem ieq_mem;
-	struct i40iw_puda_rsrc *ieq;
-
 	const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops;
 
 	struct i40iw_hmc_fpm_misc hmc_fpm_misc;
-	u16 qs_handle;
 	u32 debug_mask;
 	u16 exception_lan_queue;
 	u8 hmc_fn_id;
@@ -556,6 +572,19 @@
 	u16 mss;
 };
 
+struct i40iw_vsi_init_info {
+	struct i40iw_sc_dev *dev;
+	void  *back_vsi;
+	struct i40iw_l2params *params;
+};
+
+struct i40iw_vsi_stats_info {
+	struct i40iw_vsi_pestat *pestat;
+	u8 fcn_id;
+	bool alloc_fcn_id;
+	bool stats_initialize;
+};
+
 struct i40iw_device_init_info {
 	u64 fpm_query_buf_pa;
 	u64 fpm_commit_buf_pa;
@@ -564,7 +593,6 @@
 	struct i40iw_hw *hw;
 	void __iomem *bar0;
 	enum i40iw_status_code (*vchnl_send)(struct i40iw_sc_dev *, u32, u8 *, u16);
-	u16 qs_handle;
 	u16 exception_lan_queue;
 	u8 hmc_fn_id;
 	bool is_pf;
@@ -722,6 +750,8 @@
 	bool iwarp_info_valid;
 	bool err_rq_idx_valid;
 	u16 err_rq_idx;
+	bool add_to_qoslist;
+	u8 user_pri;
 };
 
 struct i40iw_aeqe_info {
@@ -814,6 +844,7 @@
 struct i40iw_qp_init_info {
 	struct i40iw_qp_uk_init_info qp_uk_init_info;
 	struct i40iw_sc_pd *pd;
+	struct i40iw_sc_vsi *vsi;
 	u64 *host_ctx;
 	u8 *q2;
 	u64 sq_pa;
@@ -880,13 +911,14 @@
 };
 
 struct i40iw_qhash_table_info {
+	struct i40iw_sc_vsi *vsi;
 	enum i40iw_quad_hash_manage_type manage;
 	enum i40iw_quad_entry_type entry_type;
 	bool vlan_valid;
 	bool ipv4_valid;
 	u8 mac_addr[6];
 	u16 vlan_id;
-	u16 qs_handle;
+	u8 user_pri;
 	u32 qp_num;
 	u32 dest_ip[4];
 	u32 src_ip[4];
@@ -976,7 +1008,7 @@
 struct i40iw_cqp_ops {
 	enum i40iw_status_code (*cqp_init)(struct i40iw_sc_cqp *,
 					   struct i40iw_cqp_init_info *);
-	enum i40iw_status_code (*cqp_create)(struct i40iw_sc_cqp *, bool, u16 *, u16 *);
+	enum i40iw_status_code (*cqp_create)(struct i40iw_sc_cqp *, u16 *, u16 *);
 	void (*cqp_post_sq)(struct i40iw_sc_cqp *);
 	u64 *(*cqp_get_next_send_wqe)(struct i40iw_sc_cqp *, u64 scratch);
 	enum i40iw_status_code (*cqp_destroy)(struct i40iw_sc_cqp *);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
index 4d28c3c..4376cd6 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_uk.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c
@@ -175,12 +175,10 @@
 		if (!*wqe_idx)
 			qp->swqe_polarity = !qp->swqe_polarity;
 	}
-
-	for (i = 0; i < wqe_size / I40IW_QP_WQE_MIN_SIZE; i++) {
-		I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
-		if (ret_code)
-			return NULL;
-	}
+	I40IW_RING_MOVE_HEAD_BY_COUNT(qp->sq_ring,
+				      wqe_size / I40IW_QP_WQE_MIN_SIZE, ret_code);
+	if (ret_code)
+		return NULL;
 
 	wqe = qp->sq_base[*wqe_idx].elem;
 
@@ -430,7 +428,7 @@
 	struct i40iw_inline_rdma_write *op_info;
 	u64 *push;
 	u64 header = 0;
-	u32 i, wqe_idx;
+	u32 wqe_idx;
 	enum i40iw_status_code ret_code;
 	bool read_fence = false;
 	u8 wqe_size;
@@ -465,14 +463,12 @@
 	src = (u8 *)(op_info->data);
 
 	if (op_info->len <= 16) {
-		for (i = 0; i < op_info->len; i++, src++, dest++)
-			*dest = *src;
+		memcpy(dest, src, op_info->len);
 	} else {
-		for (i = 0; i < 16; i++, src++, dest++)
-			*dest = *src;
+		memcpy(dest, src, 16);
+		src += 16;
 		dest = (u8 *)wqe + 32;
-		for (; i < op_info->len; i++, src++, dest++)
-			*dest = *src;
+		memcpy(dest, src, op_info->len - 16);
 	}
 
 	wmb(); /* make sure WQE is populated before valid bit is set */
@@ -507,7 +503,7 @@
 	u8 *dest, *src;
 	struct i40iw_post_inline_send *op_info;
 	u64 header;
-	u32 wqe_idx, i;
+	u32 wqe_idx;
 	enum i40iw_status_code ret_code;
 	bool read_fence = false;
 	u8 wqe_size;
@@ -540,14 +536,12 @@
 	src = (u8 *)(op_info->data);
 
 	if (op_info->len <= 16) {
-		for (i = 0; i < op_info->len; i++, src++, dest++)
-			*dest = *src;
+		memcpy(dest, src, op_info->len);
 	} else {
-		for (i = 0; i < 16; i++, src++, dest++)
-			*dest = *src;
+		memcpy(dest, src, 16);
+		src += 16;
 		dest = (u8 *)wqe + 32;
-		for (; i < op_info->len; i++, src++, dest++)
-			*dest = *src;
+		memcpy(dest, src, op_info->len - 16);
 	}
 
 	wmb(); /* make sure WQE is populated before valid bit is set */
@@ -1190,12 +1184,8 @@
 
 	if (data_size <= 16)
 		*wqe_size = I40IW_QP_WQE_MIN_SIZE;
-	else if (data_size <= 48)
-		*wqe_size = 64;
-	else if (data_size <= 80)
-		*wqe_size = 96;
 	else
-		*wqe_size = 128;
+		*wqe_size = 64;
 
 	return 0;
 }
diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h
index 276bcef..80d9f46 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_user.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_user.h
@@ -72,12 +72,12 @@
 	I40IW_MAX_SQ_PAYLOAD_SIZE =		2145386496,
 	I40IW_MAX_INLINE_DATA_SIZE =		48,
 	I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE =	48,
-	I40IW_MAX_IRD_SIZE =			32,
-	I40IW_QPCTX_ENCD_MAXIRD =		3,
+	I40IW_MAX_IRD_SIZE =			63,
+	I40IW_MAX_ORD_SIZE =			127,
 	I40IW_MAX_WQ_ENTRIES =			2048,
-	I40IW_MAX_ORD_SIZE =			32,
 	I40IW_Q2_BUFFER_SIZE =			(248 + 100),
-	I40IW_QP_CTX_SIZE =			248
+	I40IW_QP_CTX_SIZE =			248,
+	I40IW_MAX_PDS = 			32768
 };
 
 #define i40iw_handle void *
@@ -96,12 +96,6 @@
 #define i40iw_physical_fragment u64
 #define i40iw_address_list u64 *
 
-#define I40IW_CREATE_STAG(index, key)       (((index) << 8) + (key))
-
-#define I40IW_STAG_KEY_FROM_STAG(stag)      ((stag) && 0x000000FF)
-
-#define I40IW_STAG_INDEX_FROM_STAG(stag)    (((stag) && 0xFFFFFF00) >> 8)
-
 #define	I40IW_MAX_MR_SIZE	0x10000000000L
 
 struct i40iw_qp_uk;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 6fd043b..0f5d43d 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -153,6 +153,7 @@
 	struct i40iw_device *iwdev;
 	struct i40iw_handler *hdl;
 	u32 local_ipaddr;
+	u32 action = I40IW_ARP_ADD;
 
 	hdl = i40iw_find_netdev(event_netdev);
 	if (!hdl)
@@ -164,44 +165,25 @@
 	if (netdev != event_netdev)
 		return NOTIFY_DONE;
 
+	if (upper_dev)
+		local_ipaddr = ntohl(
+			((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
+	else
+		local_ipaddr = ntohl(ifa->ifa_address);
 	switch (event) {
 	case NETDEV_DOWN:
-		if (upper_dev)
-			local_ipaddr = ntohl(
-				((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
-		else
-			local_ipaddr = ntohl(ifa->ifa_address);
-		i40iw_manage_arp_cache(iwdev,
-				       netdev->dev_addr,
-				       &local_ipaddr,
-				       true,
-				       I40IW_ARP_DELETE);
-		return NOTIFY_OK;
+		action = I40IW_ARP_DELETE;
+		/* Fall through */
 	case NETDEV_UP:
-		if (upper_dev)
-			local_ipaddr = ntohl(
-				((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
-		else
-			local_ipaddr = ntohl(ifa->ifa_address);
-		i40iw_manage_arp_cache(iwdev,
-				       netdev->dev_addr,
-				       &local_ipaddr,
-				       true,
-				       I40IW_ARP_ADD);
-		break;
+		/* Fall through */
 	case NETDEV_CHANGEADDR:
-		/* Add the address to the IP table */
-		if (upper_dev)
-			local_ipaddr = ntohl(
-				((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
-		else
-			local_ipaddr = ntohl(ifa->ifa_address);
-
 		i40iw_manage_arp_cache(iwdev,
 				       netdev->dev_addr,
 				       &local_ipaddr,
 				       true,
-				       I40IW_ARP_ADD);
+				       action);
+		i40iw_if_notify(iwdev, netdev, &local_ipaddr, true,
+				(action == I40IW_ARP_ADD) ? true : false);
 		break;
 	default:
 		break;
@@ -225,6 +207,7 @@
 	struct i40iw_device *iwdev;
 	struct i40iw_handler *hdl;
 	u32 local_ipaddr6[4];
+	u32 action = I40IW_ARP_ADD;
 
 	hdl = i40iw_find_netdev(event_netdev);
 	if (!hdl)
@@ -235,24 +218,21 @@
 	if (netdev != event_netdev)
 		return NOTIFY_DONE;
 
+	i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
 	switch (event) {
 	case NETDEV_DOWN:
-		i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
-		i40iw_manage_arp_cache(iwdev,
-				       netdev->dev_addr,
-				       local_ipaddr6,
-				       false,
-				       I40IW_ARP_DELETE);
-		return NOTIFY_OK;
+		action = I40IW_ARP_DELETE;
+		/* Fall through */
 	case NETDEV_UP:
 		/* Fall through */
 	case NETDEV_CHANGEADDR:
-		i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
 		i40iw_manage_arp_cache(iwdev,
 				       netdev->dev_addr,
 				       local_ipaddr6,
 				       false,
-				       I40IW_ARP_ADD);
+				       action);
+		i40iw_if_notify(iwdev, netdev, local_ipaddr6, false,
+				(action == I40IW_ARP_ADD) ? true : false);
 		break;
 	default:
 		break;
@@ -392,6 +372,7 @@
 
 	i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
 	i40iw_free_qp_resources(iwdev, iwqp, qp_num);
+	i40iw_rem_devusecount(iwdev);
 }
 
 /**
@@ -415,7 +396,10 @@
 		i40iw_pr_err("error cqp command 0x%x timed out ret = %d\n",
 			     info->cqp_cmd, timeout_ret);
 		err_code = -ETIME;
-		i40iw_request_reset(iwdev);
+		if (!iwdev->reset) {
+			iwdev->reset = true;
+			i40iw_request_reset(iwdev);
+		}
 		goto done;
 	}
 	cqp_error = cqp_request->compl_info.error;
@@ -445,6 +429,11 @@
 	struct cqp_commands_info *info = &cqp_request->info;
 	int err_code = 0;
 
+	if (iwdev->reset) {
+		i40iw_free_cqp_request(&iwdev->cqp, cqp_request);
+		return I40IW_ERR_CQP_COMPL_ERROR;
+	}
+
 	status = i40iw_process_cqp_cmd(dev, info);
 	if (status) {
 		i40iw_pr_err("error cqp command 0x%x failed\n", info->cqp_cmd);
@@ -459,6 +448,26 @@
 }
 
 /**
+ * i40iw_add_devusecount - add dev refcount
+ * @iwdev: dev for refcount
+ */
+void i40iw_add_devusecount(struct i40iw_device *iwdev)
+{
+	atomic64_inc(&iwdev->use_count);
+}
+
+/**
+ * i40iw_rem_devusecount - decrement refcount for dev
+ * @iwdev: device
+ */
+void i40iw_rem_devusecount(struct i40iw_device *iwdev)
+{
+	if (!atomic64_dec_and_test(&iwdev->use_count))
+		return;
+	wake_up(&iwdev->close_wq);
+}
+
+/**
  * i40iw_add_pdusecount - add pd refcount
  * @iwpd: pd for refcount
  */
@@ -712,6 +721,51 @@
 }
 
 /**
+ * i40iw_qp_suspend_resume - cqp command for suspend/resume
+ * @dev: hardware control device structure
+ * @qp: hardware control qp
+ * @suspend: flag if suspend or resume
+ */
+void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, bool suspend)
+{
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+	struct i40iw_cqp_request *cqp_request;
+	struct i40iw_sc_cqp *cqp = dev->cqp;
+	struct cqp_commands_info *cqp_info;
+	enum i40iw_status_code status;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
+	if (!cqp_request)
+		return;
+
+	cqp_info = &cqp_request->info;
+	cqp_info->cqp_cmd = (suspend) ? OP_SUSPEND : OP_RESUME;
+	cqp_info->in.u.suspend_resume.cqp = cqp;
+	cqp_info->in.u.suspend_resume.qp = qp;
+	cqp_info->in.u.suspend_resume.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP QP Suspend/Resume fail");
+}
+
+/**
+ * i40iw_qp_mss_modify - modify mss for qp
+ * @dev: hardware control device structure
+ * @qp: hardware control qp
+ */
+void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
+{
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+	struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
+	struct i40iw_modify_qp_info info;
+
+	memset(&info, 0, sizeof(info));
+	info.mss_change = true;
+	info.new_mss = qp->vsi->mss;
+	i40iw_hw_modify_qp(iwdev, iwqp, &info, false);
+}
+
+/**
  * i40iw_term_modify_qp - modify qp for term message
  * @qp: hardware control qp
  * @next_state: qp's next state
@@ -769,6 +823,7 @@
 	struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)&iwqp->sc_qp;
 
 	i40iw_terminate_done(qp, 1);
+	i40iw_rem_ref(&iwqp->ibqp);
 }
 
 /**
@@ -780,6 +835,7 @@
 	struct i40iw_qp *iwqp;
 
 	iwqp = (struct i40iw_qp *)qp->back_qp;
+	i40iw_add_ref(&iwqp->ibqp);
 	init_timer(&iwqp->terminate_timer);
 	iwqp->terminate_timer.function = i40iw_terminate_timeout;
 	iwqp->terminate_timer.expires = jiffies + HZ;
@@ -796,7 +852,8 @@
 	struct i40iw_qp *iwqp;
 
 	iwqp = (struct i40iw_qp *)qp->back_qp;
-	del_timer(&iwqp->terminate_timer);
+	if (del_timer(&iwqp->terminate_timer))
+		i40iw_rem_ref(&iwqp->ibqp);
 }
 
 /**
@@ -1011,6 +1068,116 @@
 }
 
 /**
+ * i40iw_cqp_cq_create_cmd - create a cq for the cqp
+ * @dev: device pointer
+ * @cq: pointer to created cq
+ */
+enum i40iw_status_code i40iw_cqp_cq_create_cmd(struct i40iw_sc_dev *dev,
+					       struct i40iw_sc_cq *cq)
+{
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+	struct i40iw_cqp *iwcqp = &iwdev->cqp;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	enum i40iw_status_code status;
+
+	cqp_request = i40iw_get_cqp_request(iwcqp, true);
+	if (!cqp_request)
+		return I40IW_ERR_NO_MEMORY;
+
+	cqp_info = &cqp_request->info;
+	cqp_info->cqp_cmd = OP_CQ_CREATE;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.cq_create.cq = cq;
+	cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Create QP fail");
+
+	return status;
+}
+
+/**
+ * i40iw_cqp_qp_create_cmd - create a qp for the cqp
+ * @dev: device pointer
+ * @qp: pointer to created qp
+ */
+enum i40iw_status_code i40iw_cqp_qp_create_cmd(struct i40iw_sc_dev *dev,
+					       struct i40iw_sc_qp *qp)
+{
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+	struct i40iw_cqp *iwcqp = &iwdev->cqp;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	struct i40iw_create_qp_info *qp_info;
+	enum i40iw_status_code status;
+
+	cqp_request = i40iw_get_cqp_request(iwcqp, true);
+	if (!cqp_request)
+		return I40IW_ERR_NO_MEMORY;
+
+	cqp_info = &cqp_request->info;
+	qp_info = &cqp_request->info.in.u.qp_create.info;
+
+	memset(qp_info, 0, sizeof(*qp_info));
+
+	qp_info->cq_num_valid = true;
+	qp_info->next_iwarp_state = I40IW_QP_STATE_RTS;
+
+	cqp_info->cqp_cmd = OP_QP_CREATE;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.qp_create.qp = qp;
+	cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP QP create fail");
+	return status;
+}
+
+/**
+ * i40iw_cqp_cq_destroy_cmd - destroy the cqp cq
+ * @dev: device pointer
+ * @cq: pointer to cq
+ */
+void i40iw_cqp_cq_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq)
+{
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+	i40iw_cq_wq_destroy(iwdev, cq);
+}
+
+/**
+ * i40iw_cqp_qp_destroy_cmd - destroy the cqp
+ * @dev: device pointer
+ * @qp: pointer to qp
+ */
+void i40iw_cqp_qp_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
+{
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+	struct i40iw_cqp *iwcqp = &iwdev->cqp;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	enum i40iw_status_code status;
+
+	cqp_request = i40iw_get_cqp_request(iwcqp, true);
+	if (!cqp_request)
+		return;
+
+	cqp_info = &cqp_request->info;
+	memset(cqp_info, 0, sizeof(*cqp_info));
+
+	cqp_info->cqp_cmd = OP_QP_DESTROY;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.qp_destroy.qp = qp;
+	cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
+	cqp_info->in.u.qp_destroy.remove_hash_idx = true;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP QP_DESTROY fail");
+}
+
+
+/**
  * i40iw_ieq_mpa_crc_ae - generate AE for crc error
  * @dev: hardware control device structure
  * @qp: hardware control qp
@@ -1208,7 +1375,7 @@
 
 	buf->totallen = pkt_len + buf->maclen;
 
-	if (info->payload_len < buf->totallen - 4) {
+	if (info->payload_len < buf->totallen) {
 		i40iw_pr_err("payload_len = 0x%x totallen expected0x%x\n",
 			     info->payload_len, buf->totallen);
 		return I40IW_ERR_INVALID_SIZE;
@@ -1224,27 +1391,29 @@
 
 /**
  * i40iw_hw_stats_timeout - Stats timer-handler which updates all HW stats
- * @dev: hardware control device structure
+ * @vsi: pointer to the vsi structure
  */
-static void i40iw_hw_stats_timeout(unsigned long dev)
+static void i40iw_hw_stats_timeout(unsigned long vsi)
 {
-	struct i40iw_sc_dev *pf_dev = (struct i40iw_sc_dev *)dev;
-	struct i40iw_dev_pestat *pf_devstat = &pf_dev->dev_pestat;
-	struct i40iw_dev_pestat *vf_devstat = NULL;
+	struct i40iw_sc_vsi *sc_vsi =  (struct i40iw_sc_vsi *)vsi;
+	struct i40iw_sc_dev *pf_dev = sc_vsi->dev;
+	struct i40iw_vsi_pestat *pf_devstat = sc_vsi->pestat;
+	struct i40iw_vsi_pestat *vf_devstat = NULL;
 	u16 iw_vf_idx;
 	unsigned long flags;
 
 	/*PF*/
-	pf_devstat->ops.iw_hw_stat_read_all(pf_devstat, &pf_devstat->hw_stats);
+	i40iw_hw_stats_read_all(pf_devstat, &pf_devstat->hw_stats);
+
 	for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) {
-		spin_lock_irqsave(&pf_devstat->stats_lock, flags);
+		spin_lock_irqsave(&pf_devstat->lock, flags);
 		if (pf_dev->vf_dev[iw_vf_idx]) {
 			if (pf_dev->vf_dev[iw_vf_idx]->stats_initialized) {
-				vf_devstat = &pf_dev->vf_dev[iw_vf_idx]->dev_pestat;
-				vf_devstat->ops.iw_hw_stat_read_all(vf_devstat, &vf_devstat->hw_stats);
+				vf_devstat = &pf_dev->vf_dev[iw_vf_idx]->pestat;
+				i40iw_hw_stats_read_all(vf_devstat, &vf_devstat->hw_stats);
 			}
 		}
-		spin_unlock_irqrestore(&pf_devstat->stats_lock, flags);
+		spin_unlock_irqrestore(&pf_devstat->lock, flags);
 	}
 
 	mod_timer(&pf_devstat->stats_timer,
@@ -1253,26 +1422,26 @@
 
 /**
  * i40iw_hw_stats_start_timer - Start periodic stats timer
- * @dev: hardware control device structure
+ * @vsi: pointer to the vsi structure
  */
-void i40iw_hw_stats_start_timer(struct i40iw_sc_dev *dev)
+void i40iw_hw_stats_start_timer(struct i40iw_sc_vsi *vsi)
 {
-	struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
+	struct i40iw_vsi_pestat *devstat = vsi->pestat;
 
 	init_timer(&devstat->stats_timer);
 	devstat->stats_timer.function = i40iw_hw_stats_timeout;
-	devstat->stats_timer.data = (unsigned long)dev;
+	devstat->stats_timer.data = (unsigned long)vsi;
 	mod_timer(&devstat->stats_timer,
 		  jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
 }
 
 /**
- * i40iw_hw_stats_del_timer - Delete periodic stats timer
- * @dev: hardware control device structure
+ * i40iw_hw_stats_stop_timer - Delete periodic stats timer
+ * @vsi: pointer to the vsi structure
  */
-void i40iw_hw_stats_del_timer(struct i40iw_sc_dev *dev)
+void i40iw_hw_stats_stop_timer(struct i40iw_sc_vsi *vsi)
 {
-	struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
+	struct i40iw_vsi_pestat *devstat = vsi->pestat;
 
 	del_timer_sync(&devstat->stats_timer);
 }
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 6329c97..7368a50 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -37,6 +37,7 @@
 #include <linux/random.h>
 #include <linux/highmem.h>
 #include <linux/time.h>
+#include <linux/hugetlb.h>
 #include <asm/byteorder.h>
 #include <net/ip.h>
 #include <rdma/ib_verbs.h>
@@ -67,13 +68,13 @@
 	props->vendor_part_id = iwdev->ldev->pcidev->device;
 	props->hw_ver = (u32)iwdev->sc_dev.hw_rev;
 	props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
-	props->max_qp = iwdev->max_qp;
+	props->max_qp = iwdev->max_qp - iwdev->used_qps;
 	props->max_qp_wr = (I40IW_MAX_WQ_ENTRIES >> 2) - 1;
 	props->max_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
-	props->max_cq = iwdev->max_cq;
+	props->max_cq = iwdev->max_cq - iwdev->used_cqs;
 	props->max_cqe = iwdev->max_cqe;
-	props->max_mr = iwdev->max_mr;
-	props->max_pd = iwdev->max_pd;
+	props->max_mr = iwdev->max_mr - iwdev->used_mrs;
+	props->max_pd = iwdev->max_pd - iwdev->used_pds;
 	props->max_sge_rd = I40IW_MAX_SGE_RD;
 	props->max_qp_rd_atom = I40IW_MAX_IRD_SIZE;
 	props->max_qp_init_rd_atom = props->max_qp_rd_atom;
@@ -254,7 +255,6 @@
 {
 	struct i40iw_cqp_request *cqp_request;
 	struct cqp_commands_info *cqp_info;
-	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
 	enum i40iw_status_code status;
 
 	if (qp->push_idx != I40IW_INVALID_PUSH_PAGE_INDEX)
@@ -270,7 +270,7 @@
 	cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE;
 	cqp_info->post_sq = 1;
 
-	cqp_info->in.u.manage_push_page.info.qs_handle = dev->qs_handle;
+	cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle;
 	cqp_info->in.u.manage_push_page.info.free_page = 0;
 	cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
 	cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
@@ -292,7 +292,6 @@
 {
 	struct i40iw_cqp_request *cqp_request;
 	struct cqp_commands_info *cqp_info;
-	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
 	enum i40iw_status_code status;
 
 	if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX)
@@ -307,7 +306,7 @@
 	cqp_info->post_sq = 1;
 
 	cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx;
-	cqp_info->in.u.manage_push_page.info.qs_handle = dev->qs_handle;
+	cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle;
 	cqp_info->in.u.manage_push_page.info.free_page = 1;
 	cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
 	cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
@@ -337,6 +336,9 @@
 	u32 pd_id = 0;
 	int err;
 
+	if (iwdev->closing)
+		return ERR_PTR(-ENODEV);
+
 	err = i40iw_alloc_resource(iwdev, iwdev->allocated_pds,
 				   iwdev->max_pd, &pd_id, &iwdev->next_pd);
 	if (err) {
@@ -602,6 +604,9 @@
 	struct i40iwarp_offload_info *iwarp_info;
 	unsigned long flags;
 
+	if (iwdev->closing)
+		return ERR_PTR(-ENODEV);
+
 	if (init_attr->create_flags)
 		return ERR_PTR(-EINVAL);
 	if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE)
@@ -610,11 +615,15 @@
 	if (init_attr->cap.max_send_sge > I40IW_MAX_WQ_FRAGMENT_COUNT)
 		init_attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
 
+	if (init_attr->cap.max_recv_sge > I40IW_MAX_WQ_FRAGMENT_COUNT)
+		init_attr->cap.max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
+
 	memset(&init_info, 0, sizeof(init_info));
 
 	sq_size = init_attr->cap.max_send_wr;
 	rq_size = init_attr->cap.max_recv_wr;
 
+	init_info.vsi = &iwdev->vsi;
 	init_info.qp_uk_init_info.sq_size = sq_size;
 	init_info.qp_uk_init_info.rq_size = rq_size;
 	init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge;
@@ -774,6 +783,7 @@
 	iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
 	iwdev->qp_table[qp_num] = iwqp;
 	i40iw_add_pdusecount(iwqp->iwpd);
+	i40iw_add_devusecount(iwdev);
 	if (ibpd->uobject && udata) {
 		memset(&uresp, 0, sizeof(uresp));
 		uresp.actual_sq_size = sq_size;
@@ -815,8 +825,9 @@
 	attr->qp_access_flags = 0;
 	attr->cap.max_send_wr = qp->qp_uk.sq_size;
 	attr->cap.max_recv_wr = qp->qp_uk.rq_size;
-	attr->cap.max_recv_sge = 1;
 	attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE;
+	attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
+	attr->cap.max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
 	init_attr->event_handler = iwqp->ibqp.event_handler;
 	init_attr->qp_context = iwqp->ibqp.qp_context;
 	init_attr->send_cq = iwqp->ibqp.send_cq;
@@ -884,6 +895,11 @@
 	spin_lock_irqsave(&iwqp->lock, flags);
 
 	if (attr_mask & IB_QP_STATE) {
+		if (iwdev->closing && attr->qp_state != IB_QPS_ERR) {
+			err = -EINVAL;
+			goto exit;
+		}
+
 		switch (attr->qp_state) {
 		case IB_QPS_INIT:
 		case IB_QPS_RTR:
@@ -944,7 +960,7 @@
 				goto exit;
 			}
 			if (iwqp->sc_qp.term_flags)
-				del_timer(&iwqp->terminate_timer);
+				i40iw_terminate_del_timer(&iwqp->sc_qp);
 			info.next_iwarp_state = I40IW_QP_STATE_ERROR;
 			if ((iwqp->hw_tcp_state > I40IW_TCP_STATE_CLOSED) &&
 			    iwdev->iw_status &&
@@ -1037,11 +1053,11 @@
 }
 
 /**
- * cq_wq_destroy - send cq destroy cqp
+ * i40iw_cq_wq_destroy - send cq destroy cqp
  * @iwdev: iwarp device
  * @cq: hardware control cq
  */
-static void cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq)
+void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq)
 {
 	enum i40iw_status_code status;
 	struct i40iw_cqp_request *cqp_request;
@@ -1080,9 +1096,10 @@
 	iwcq = to_iwcq(ib_cq);
 	iwdev = to_iwdev(ib_cq->device);
 	cq = &iwcq->sc_cq;
-	cq_wq_destroy(iwdev, cq);
+	i40iw_cq_wq_destroy(iwdev, cq);
 	cq_free_resources(iwdev, iwcq);
 	kfree(iwcq);
+	i40iw_rem_devusecount(iwdev);
 	return 0;
 }
 
@@ -1113,6 +1130,9 @@
 	int err_code;
 	int entries = attr->cqe;
 
+	if (iwdev->closing)
+		return ERR_PTR(-ENODEV);
+
 	if (entries > iwdev->max_cqe)
 		return ERR_PTR(-EINVAL);
 
@@ -1137,7 +1157,8 @@
 	ukinfo->cq_id = cq_num;
 	iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size;
 	info.ceqe_mask = 0;
-	info.ceq_id = 0;
+	if (attr->comp_vector < iwdev->ceqs_count)
+		info.ceq_id = attr->comp_vector;
 	info.ceq_id_valid = true;
 	info.ceqe_mask = 1;
 	info.type = I40IW_CQ_TYPE_IWARP;
@@ -1229,10 +1250,11 @@
 		}
 	}
 
+	i40iw_add_devusecount(iwdev);
 	return (struct ib_cq *)iwcq;
 
 cq_destroy:
-	cq_wq_destroy(iwdev, cq);
+	i40iw_cq_wq_destroy(iwdev, cq);
 cq_free_resources:
 	cq_free_resources(iwdev, iwcq);
 error:
@@ -1266,6 +1288,7 @@
 
 	stag_idx = (stag & iwdev->mr_stagmask) >> I40IW_CQPSQ_STAG_IDX_SHIFT;
 	i40iw_free_resource(iwdev, iwdev->allocated_mrs, stag_idx);
+	i40iw_rem_devusecount(iwdev);
 }
 
 /**
@@ -1296,19 +1319,18 @@
 		stag = stag_index << I40IW_CQPSQ_STAG_IDX_SHIFT;
 		stag |= driver_key;
 		stag += (u32)consumer_key;
+		i40iw_add_devusecount(iwdev);
 	}
 	return stag;
 }
 
 /**
  * i40iw_next_pbl_addr - Get next pbl address
- * @palloc: Poiner to allocated pbles
  * @pbl: pointer to a pble
  * @pinfo: info pointer
  * @idx: index
  */
-static inline u64 *i40iw_next_pbl_addr(struct i40iw_pble_alloc *palloc,
-				       u64 *pbl,
+static inline u64 *i40iw_next_pbl_addr(u64 *pbl,
 				       struct i40iw_pble_info **pinfo,
 				       u32 *idx)
 {
@@ -1336,9 +1358,11 @@
 	struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
 	struct i40iw_pble_info *pinfo;
 	struct scatterlist *sg;
+	u64 pg_addr = 0;
 	u32 idx = 0;
 
 	pinfo = (level == I40IW_LEVEL_1) ? NULL : palloc->level2.leaf;
+
 	pg_shift = ffs(region->page_size) - 1;
 	for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
 		chunk_pages = sg_dma_len(sg) >> pg_shift;
@@ -1346,17 +1370,96 @@
 		    !iwpbl->qp_mr.sq_page)
 			iwpbl->qp_mr.sq_page = sg_page(sg);
 		for (i = 0; i < chunk_pages; i++) {
-			*pbl = cpu_to_le64(sg_dma_address(sg) + region->page_size * i);
-			pbl = i40iw_next_pbl_addr(palloc, pbl, &pinfo, &idx);
+			pg_addr = sg_dma_address(sg) + region->page_size * i;
+
+			if ((entry + i) == 0)
+				*pbl = cpu_to_le64(pg_addr & iwmr->page_msk);
+			else if (!(pg_addr & ~iwmr->page_msk))
+				*pbl = cpu_to_le64(pg_addr);
+			else
+				continue;
+			pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx);
 		}
 	}
 }
 
 /**
+ * i40iw_set_hugetlb_params - set MR pg size and mask to huge pg values.
+ * @addr: virtual address
+ * @iwmr: mr pointer for this memory registration
+ */
+static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr)
+{
+	struct vm_area_struct *vma;
+	struct hstate *h;
+
+	vma = find_vma(current->mm, addr);
+	if (vma && is_vm_hugetlb_page(vma)) {
+		h = hstate_vma(vma);
+		if (huge_page_size(h) == 0x200000) {
+			iwmr->page_size = huge_page_size(h);
+			iwmr->page_msk = huge_page_mask(h);
+		}
+	}
+}
+
+/**
+ * i40iw_check_mem_contiguous - check if pbls stored in arr are contiguous
+ * @arr: lvl1 pbl array
+ * @npages: page count
+ * pg_size: page size
+ *
+ */
+static bool i40iw_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size)
+{
+	u32 pg_idx;
+
+	for (pg_idx = 0; pg_idx < npages; pg_idx++) {
+		if ((*arr + (pg_size * pg_idx)) != arr[pg_idx])
+			return false;
+	}
+	return true;
+}
+
+/**
+ * i40iw_check_mr_contiguous - check if MR is physically contiguous
+ * @palloc: pbl allocation struct
+ * pg_size: page size
+ */
+static bool i40iw_check_mr_contiguous(struct i40iw_pble_alloc *palloc, u32 pg_size)
+{
+	struct i40iw_pble_level2 *lvl2 = &palloc->level2;
+	struct i40iw_pble_info *leaf = lvl2->leaf;
+	u64 *arr = NULL;
+	u64 *start_addr = NULL;
+	int i;
+	bool ret;
+
+	if (palloc->level == I40IW_LEVEL_1) {
+		arr = (u64 *)palloc->level1.addr;
+		ret = i40iw_check_mem_contiguous(arr, palloc->total_cnt, pg_size);
+		return ret;
+	}
+
+	start_addr = (u64 *)leaf->addr;
+
+	for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) {
+		arr = (u64 *)leaf->addr;
+		if ((*start_addr + (i * pg_size * PBLE_PER_PAGE)) != *arr)
+			return false;
+		ret = i40iw_check_mem_contiguous(arr, leaf->cnt, pg_size);
+		if (!ret)
+			return false;
+	}
+
+	return true;
+}
+
+/**
  * i40iw_setup_pbles - copy user pg address to pble's
  * @iwdev: iwarp device
  * @iwmr: mr pointer for this memory registration
- * @use_pbles: flag if to use pble's or memory (level 0)
+ * @use_pbles: flag if to use pble's
  */
 static int i40iw_setup_pbles(struct i40iw_device *iwdev,
 			     struct i40iw_mr *iwmr,
@@ -1369,9 +1472,6 @@
 	enum i40iw_status_code status;
 	enum i40iw_pble_level level = I40IW_LEVEL_1;
 
-	if (!use_pbles && (iwmr->page_cnt > MAX_SAVE_PAGE_ADDRS))
-		return -ENOMEM;
-
 	if (use_pbles) {
 		mutex_lock(&iwdev->pbl_mutex);
 		status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
@@ -1388,6 +1488,10 @@
 	}
 
 	i40iw_copy_user_pgaddrs(iwmr, pbl, level);
+
+	if (use_pbles)
+		iwmr->pgaddrmem[0] = *pbl;
+
 	return 0;
 }
 
@@ -1409,14 +1513,18 @@
 	struct i40iw_cq_mr *cqmr = &iwpbl->cq_mr;
 	struct i40iw_hmc_pble *hmc_p;
 	u64 *arr = iwmr->pgaddrmem;
+	u32 pg_size;
 	int err;
 	int total;
+	bool ret = true;
 
 	total = req->sq_pages + req->rq_pages + req->cq_pages;
+	pg_size = iwmr->page_size;
 
 	err = i40iw_setup_pbles(iwdev, iwmr, use_pbles);
 	if (err)
 		return err;
+
 	if (use_pbles && (palloc->level != I40IW_LEVEL_1)) {
 		i40iw_free_pble(iwdev->pble_rsrc, palloc);
 		iwpbl->pbl_allocated = false;
@@ -1425,26 +1533,44 @@
 
 	if (use_pbles)
 		arr = (u64 *)palloc->level1.addr;
-	if (req->reg_type == IW_MEMREG_TYPE_QP) {
+
+	if (iwmr->type == IW_MEMREG_TYPE_QP) {
 		hmc_p = &qpmr->sq_pbl;
 		qpmr->shadow = (dma_addr_t)arr[total];
+
 		if (use_pbles) {
+			ret = i40iw_check_mem_contiguous(arr, req->sq_pages, pg_size);
+			if (ret)
+				ret = i40iw_check_mem_contiguous(&arr[req->sq_pages], req->rq_pages, pg_size);
+		}
+
+		if (!ret) {
 			hmc_p->idx = palloc->level1.idx;
 			hmc_p = &qpmr->rq_pbl;
 			hmc_p->idx = palloc->level1.idx + req->sq_pages;
 		} else {
 			hmc_p->addr = arr[0];
 			hmc_p = &qpmr->rq_pbl;
-			hmc_p->addr = arr[1];
+			hmc_p->addr = arr[req->sq_pages];
 		}
 	} else {		/* CQ */
 		hmc_p = &cqmr->cq_pbl;
 		cqmr->shadow = (dma_addr_t)arr[total];
+
 		if (use_pbles)
+			ret = i40iw_check_mem_contiguous(arr, req->cq_pages, pg_size);
+
+		if (!ret)
 			hmc_p->idx = palloc->level1.idx;
 		else
 			hmc_p->addr = arr[0];
 	}
+
+	if (use_pbles && ret) {
+		i40iw_free_pble(iwdev->pble_rsrc, palloc);
+		iwpbl->pbl_allocated = false;
+	}
+
 	return err;
 }
 
@@ -1642,8 +1768,9 @@
 	stag_info->access_rights = access;
 	stag_info->pd_id = iwpd->sc_pd.pd_id;
 	stag_info->addr_type = I40IW_ADDR_TYPE_VA_BASED;
+	stag_info->page_size = iwmr->page_size;
 
-	if (iwmr->page_cnt > 1) {
+	if (iwpbl->pbl_allocated) {
 		if (palloc->level == I40IW_LEVEL_1) {
 			stag_info->first_pm_pbl_index = palloc->level1.idx;
 			stag_info->chunk_size = 1;
@@ -1699,6 +1826,11 @@
 	bool use_pbles = false;
 	unsigned long flags;
 	int err = -ENOSYS;
+	int ret;
+	int pg_shift;
+
+	if (iwdev->closing)
+		return ERR_PTR(-ENODEV);
 
 	if (length > I40IW_MAX_MR_SIZE)
 		return ERR_PTR(-EINVAL);
@@ -1723,9 +1855,17 @@
 	iwmr->ibmr.pd = pd;
 	iwmr->ibmr.device = pd->device;
 	ucontext = to_ucontext(pd->uobject->context);
-	region_length = region->length + (start & 0xfff);
-	pbl_depth = region_length >> 12;
-	pbl_depth += (region_length & (4096 - 1)) ? 1 : 0;
+
+	iwmr->page_size = region->page_size;
+	iwmr->page_msk = PAGE_MASK;
+
+	if (region->hugetlb && (req.reg_type == IW_MEMREG_TYPE_MEM))
+		i40iw_set_hugetlb_values(start, iwmr);
+
+	region_length = region->length + (start & (iwmr->page_size - 1));
+	pg_shift = ffs(iwmr->page_size) - 1;
+	pbl_depth = region_length >> pg_shift;
+	pbl_depth += (region_length & (iwmr->page_size - 1)) ? 1 : 0;
 	iwmr->length = region->length;
 
 	iwpbl->user_base = virt;
@@ -1755,13 +1895,21 @@
 		spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
 		break;
 	case IW_MEMREG_TYPE_MEM:
+		use_pbles = (iwmr->page_cnt != 1);
 		access = I40IW_ACCESS_FLAGS_LOCALREAD;
 
-		use_pbles = (iwmr->page_cnt != 1);
 		err = i40iw_setup_pbles(iwdev, iwmr, use_pbles);
 		if (err)
 			goto error;
 
+		if (use_pbles) {
+			ret = i40iw_check_mr_contiguous(palloc, iwmr->page_size);
+			if (ret) {
+				i40iw_free_pble(iwdev->pble_rsrc, palloc);
+				iwpbl->pbl_allocated = false;
+			}
+		}
+
 		access |= i40iw_get_user_access(acc);
 		stag = i40iw_create_stag(iwdev);
 		if (!stag) {
@@ -1778,6 +1926,7 @@
 			i40iw_free_stag(iwdev, stag);
 			goto error;
 		}
+
 		break;
 	default:
 		goto error;
@@ -1789,7 +1938,7 @@
 	return &iwmr->ibmr;
 
 error:
-	if (palloc->level != I40IW_LEVEL_0)
+	if (palloc->level != I40IW_LEVEL_0 && iwpbl->pbl_allocated)
 		i40iw_free_pble(iwdev->pble_rsrc, palloc);
 	ib_umem_release(region);
 	kfree(iwmr);
@@ -2142,7 +2291,6 @@
 		case IB_WR_REG_MR:
 		{
 			struct i40iw_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr);
-			int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size);
 			int flags = reg_wr(ib_wr)->access;
 			struct i40iw_pble_alloc *palloc = &iwmr->iwpbl.pble_alloc;
 			struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev;
@@ -2153,6 +2301,7 @@
 			info.access_rights |= i40iw_get_user_access(flags);
 			info.stag_key = reg_wr(ib_wr)->key & 0xff;
 			info.stag_idx = reg_wr(ib_wr)->key >> 8;
+			info.page_size = reg_wr(ib_wr)->mr->page_size;
 			info.wr_id = ib_wr->wr_id;
 
 			info.addr_type = I40IW_ADDR_TYPE_VA_BASED;
@@ -2166,9 +2315,6 @@
 			if (iwmr->npages > I40IW_MIN_PAGES_PER_FMR)
 				info.chunk_size = 1;
 
-			if (page_shift == 21)
-				info.page_size = 1; /* 2M page */
-
 			ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true);
 			if (ret)
 				err = -ENOMEM;
@@ -2487,21 +2633,17 @@
 {
 	struct i40iw_device *iwdev = to_iwdev(ibdev);
 	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
-	struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
+	struct i40iw_vsi_pestat *devstat = iwdev->vsi.pestat;
 	struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
-	unsigned long flags;
 
 	if (dev->is_pf) {
-		spin_lock_irqsave(&devstat->stats_lock, flags);
-		devstat->ops.iw_hw_stat_read_all(devstat,
-			&devstat->hw_stats);
-		spin_unlock_irqrestore(&devstat->stats_lock, flags);
+		i40iw_hw_stats_read_all(devstat, &devstat->hw_stats);
 	} else {
 		if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats))
 			return -ENOSYS;
 	}
 
-	memcpy(&stats->value[0], &hw_stats, sizeof(*hw_stats));
+	memcpy(&stats->value[0], hw_stats, sizeof(*hw_stats));
 
 	return stats->num_counters;
 }
@@ -2562,7 +2704,9 @@
  * @ah_attr: address handle attributes
  */
 static struct ib_ah *i40iw_create_ah(struct ib_pd *ibpd,
-				     struct ib_ah_attr *attr)
+				     struct ib_ah_attr *attr,
+				     struct ib_udata *udata)
+
 {
 	return ERR_PTR(-ENOSYS);
 }
@@ -2621,7 +2765,7 @@
 	    (1ull << IB_USER_VERBS_CMD_POST_RECV) |
 	    (1ull << IB_USER_VERBS_CMD_POST_SEND);
 	iwibdev->ibdev.phys_port_cnt = 1;
-	iwibdev->ibdev.num_comp_vectors = 1;
+	iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count;
 	iwibdev->ibdev.dma_device = &pcidev->dev;
 	iwibdev->ibdev.dev.parent = &pcidev->dev;
 	iwibdev->ibdev.query_port = i40iw_query_port;
@@ -2654,7 +2798,6 @@
 	iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL);
 	if (!iwibdev->ibdev.iwcm) {
 		ib_dealloc_device(&iwibdev->ibdev);
-		i40iw_pr_err("iwcm == NULL\n");
 		return NULL;
 	}
 
@@ -2719,6 +2862,9 @@
 	i40iw_unregister_rdma_device(iwibdev);
 	kfree(iwibdev->ibdev.iwcm);
 	iwibdev->ibdev.iwcm = NULL;
+	wait_event_timeout(iwibdev->iwdev->close_wq,
+			   !atomic64_read(&iwibdev->iwdev->use_count),
+			   I40IW_EVENT_TIMEOUT);
 	ib_dealloc_device(&iwibdev->ibdev);
 }
 
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
index 0069be8..6549c93 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
@@ -92,6 +92,8 @@
 	struct ib_umem *region;
 	u16 type;
 	u32 page_cnt;
+	u32 page_size;
+	u64 page_msk;
 	u32 npages;
 	u32 stag;
 	u64 length;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
index 3041003..f4d1368 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
@@ -403,6 +403,19 @@
 }
 
 /**
+ * i40iw_vf_init_pestat - Initialize stats for VF
+ * @devL pointer to the VF Device
+ * @stats: Statistics structure pointer
+ * @index: Stats index
+ */
+static void i40iw_vf_init_pestat(struct i40iw_sc_dev *dev, struct i40iw_vsi_pestat *stats, u16 index)
+{
+	stats->hw = dev->hw;
+	i40iw_hw_stats_init(stats, (u8)index, false);
+	spin_lock_init(&stats->lock);
+}
+
+/**
  * i40iw_vchnl_recv_pf - Receive PF virtual channel messages
  * @dev: IWARP device pointer
  * @vf_id: Virtual function ID associated with the message
@@ -421,9 +434,8 @@
 	u16 first_avail_iw_vf = I40IW_MAX_PE_ENABLED_VF_COUNT;
 	struct i40iw_virt_mem vf_dev_mem;
 	struct i40iw_virtchnl_work_info work_info;
-	struct i40iw_dev_pestat *devstat;
+	struct i40iw_vsi_pestat *stats;
 	enum i40iw_status_code ret_code;
-	unsigned long flags;
 
 	if (!dev || !msg || !len)
 		return I40IW_ERR_PARAM;
@@ -496,14 +508,7 @@
 				i40iw_debug(dev, I40IW_DEBUG_VIRT,
 					    "VF%u error CQP HMC Function operation.\n",
 					    vf_id);
-			ret_code = i40iw_device_init_pestat(&vf_dev->dev_pestat);
-			if (ret_code)
-				i40iw_debug(dev, I40IW_DEBUG_VIRT,
-					    "VF%u - i40iw_device_init_pestat failed\n",
-					    vf_id);
-			vf_dev->dev_pestat.ops.iw_hw_stat_init(&vf_dev->dev_pestat,
-							      (u8)vf_dev->pmf_index,
-							      dev->hw, false);
+			i40iw_vf_init_pestat(dev, &vf_dev->pestat, vf_dev->pmf_index);
 			vf_dev->stats_initialized = true;
 		} else {
 			if (vf_dev) {
@@ -534,12 +539,10 @@
 	case I40IW_VCHNL_OP_GET_STATS:
 		if (!vf_dev)
 			return I40IW_ERR_BAD_PTR;
-		devstat = &vf_dev->dev_pestat;
-		spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags);
-		devstat->ops.iw_hw_stat_read_all(devstat, &devstat->hw_stats);
-		spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags);
+		stats = &vf_dev->pestat;
+		i40iw_hw_stats_read_all(stats, &stats->hw_stats);
 		vf_dev->msg_count--;
-		vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, &devstat->hw_stats);
+		vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, &stats->hw_stats);
 		break;
 	default:
 		i40iw_debug(dev, I40IW_DEBUG_VIRT,
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 5fc6233..20c6d17 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -111,7 +111,9 @@
 		       !(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support))
 			--ah->av.eth.stat_rate;
 	}
-
+	ah->av.eth.sl_tclass_flowlabel |=
+			cpu_to_be32((ah_attr->grh.traffic_class << 20) |
+				    ah_attr->grh.flow_label);
 	/*
 	 * HW requires multicast LID so we just choose one.
 	 */
@@ -119,12 +121,14 @@
 		ah->av.ib.dlid = cpu_to_be16(0xc000);
 
 	memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16);
-	ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29);
+	ah->av.eth.sl_tclass_flowlabel |= cpu_to_be32(ah_attr->sl << 29);
 
 	return &ah->ibah;
 }
 
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+				struct ib_udata *udata)
+
 {
 	struct mlx4_ib_ah *ah;
 	struct ib_ah *ret;
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 5e99390..06020c5 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -755,10 +755,8 @@
 	struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov);
 
 	rec = kzalloc(sizeof *rec, GFP_KERNEL);
-	if (!rec) {
-		pr_err("alias_guid_work: No Memory\n");
+	if (!rec)
 		return;
-	}
 
 	pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1);
 	ret = get_next_record_to_update(dev, sriov_alias_port->port, rec);
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index 39a4888..d648453 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -247,10 +247,8 @@
 	struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
 
 	ent = kmalloc(sizeof (struct id_map_entry), GFP_KERNEL);
-	if (!ent) {
-		mlx4_ib_warn(ibdev, "Couldn't allocate id cache entry - out of memory\n");
+	if (!ent)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	ent->sl_cm_id = sl_cm_id;
 	ent->slave_id = slave_id;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 1672907..db564cc 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -39,6 +39,8 @@
 #include <linux/mlx4/cmd.h>
 #include <linux/gfp.h>
 #include <rdma/ib_pma.h>
+#include <linux/ip.h>
+#include <net/ipv6.h>
 
 #include <linux/mlx4/driver.h>
 #include "mlx4_ib.h"
@@ -480,6 +482,23 @@
 	return -EINVAL;
 }
 
+static int get_gids_from_l3_hdr(struct ib_grh *grh, union ib_gid *sgid,
+				union ib_gid *dgid)
+{
+	int version = ib_get_rdma_header_version((const union rdma_network_hdr *)grh);
+	enum rdma_network_type net_type;
+
+	if (version == 4)
+		net_type = RDMA_NETWORK_IPV4;
+	else if (version == 6)
+		net_type = RDMA_NETWORK_IPV6;
+	else
+		return -EINVAL;
+
+	return ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
+					 sgid, dgid);
+}
+
 int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 			  enum ib_qp_type dest_qpt, struct ib_wc *wc,
 			  struct ib_grh *grh, struct ib_mad *mad)
@@ -538,7 +557,10 @@
 	memset(&attr, 0, sizeof attr);
 	attr.port_num = port;
 	if (is_eth) {
-		memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16);
+		union ib_gid sgid;
+
+		if (get_gids_from_l3_hdr(grh, &sgid, &attr.grh.dgid))
+			return -EINVAL;
 		attr.ah_flags = IB_AH_GRH;
 	}
 	ah = ib_create_ah(tun_ctx->pd, &attr);
@@ -651,6 +673,11 @@
 		is_eth = 1;
 
 	if (is_eth) {
+		union ib_gid dgid;
+		union ib_gid sgid;
+
+		if (get_gids_from_l3_hdr(grh, &sgid, &dgid))
+			return -EINVAL;
 		if (!(wc->wc_flags & IB_WC_GRH)) {
 			mlx4_ib_warn(ibdev, "RoCE grh not present.\n");
 			return -EINVAL;
@@ -659,10 +686,10 @@
 			mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
 			return -EINVAL;
 		}
-		err = mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave);
+		err = mlx4_get_slave_from_roce_gid(dev->dev, port, dgid.raw, &slave);
 		if (err && mlx4_is_mf_bonded(dev->dev)) {
 			other_port = (port == 1) ? 2 : 1;
-			err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, grh->dgid.raw, &slave);
+			err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, dgid.raw, &slave);
 			if (!err) {
 				port = other_port;
 				pr_debug("resolved slave %d from gid %pI6 wire port %d other %d\n",
@@ -702,10 +729,18 @@
 
 	/* If a grh is present, we demux according to it */
 	if (wc->wc_flags & IB_WC_GRH) {
-		slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id);
-		if (slave < 0) {
-			mlx4_ib_warn(ibdev, "failed matching grh\n");
-			return -ENOENT;
+		if (grh->dgid.global.interface_id ==
+			cpu_to_be64(IB_SA_WELL_KNOWN_GUID) &&
+		    grh->dgid.global.subnet_prefix == cpu_to_be64(
+			atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix))) {
+			slave = 0;
+		} else {
+			slave = mlx4_ib_find_real_gid(ibdev, port,
+						      grh->dgid.global.interface_id);
+			if (slave < 0) {
+				mlx4_ib_warn(ibdev, "failed matching grh\n");
+				return -ENOENT;
+			}
 		}
 	}
 	/* Class-specific handling */
@@ -1102,10 +1137,8 @@
 
 	in_mad  = kmalloc(sizeof *in_mad, GFP_KERNEL);
 	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
-	if (!in_mad || !out_mad) {
-		mlx4_ib_warn(&dev->ib_dev, "failed to allocate memory for guid info mads\n");
+	if (!in_mad || !out_mad)
 		goto out;
-	}
 
 	guid_tbl_blk_num  *= 4;
 
@@ -1916,11 +1949,8 @@
 
 	*ret_ctx = NULL;
 	ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL);
-	if (!ctx) {
-		pr_err("failed allocating pv resource context "
-		       "for port %d, slave %d\n", port, slave);
+	if (!ctx)
 		return -ENOMEM;
-	}
 
 	ctx->ib_dev = &dev->ib_dev;
 	ctx->port = port;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index a87c395..c8413fc 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -548,6 +548,7 @@
 	props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
 	props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL;
 	props->timestamp_mask = 0xFFFFFFFFFFFFULL;
+	props->max_ah = INT_MAX;
 
 	if (!mlx4_is_slave(dev->dev))
 		err = mlx4_get_internal_clock_params(dev->dev, &clock_params);
@@ -698,9 +699,11 @@
 	if (err)
 		goto out;
 
-	props->active_width	=  (((u8 *)mailbox->buf)[5] == 0x40) ?
-						IB_WIDTH_4X : IB_WIDTH_1X;
-	props->active_speed	= IB_SPEED_QDR;
+	props->active_width	=  (((u8 *)mailbox->buf)[5] == 0x40) ||
+				   (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
+					   IB_WIDTH_4X : IB_WIDTH_1X;
+	props->active_speed	=  (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
+					   IB_SPEED_FDR : IB_SPEED_QDR;
 	props->port_cap_flags	= IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
 	props->gid_tbl_len	= mdev->dev->caps.gid_table_len[port];
 	props->max_msg_sz	= mdev->dev->caps.max_msg_sz;
@@ -2815,20 +2818,22 @@
 			kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
 				sizeof(long),
 				GFP_KERNEL);
-		if (!ibdev->ib_uc_qpns_bitmap) {
-			dev_err(&dev->persist->pdev->dev,
-				"bit map alloc failed\n");
+		if (!ibdev->ib_uc_qpns_bitmap)
 			goto err_steer_qp_release;
+
+		if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB) {
+			bitmap_zero(ibdev->ib_uc_qpns_bitmap,
+				    ibdev->steer_qpn_count);
+			err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
+					dev, ibdev->steer_qpn_base,
+					ibdev->steer_qpn_base +
+					ibdev->steer_qpn_count - 1);
+			if (err)
+				goto err_steer_free_bitmap;
+		} else {
+			bitmap_fill(ibdev->ib_uc_qpns_bitmap,
+				    ibdev->steer_qpn_count);
 		}
-
-		bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count);
-
-		err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
-				dev, ibdev->steer_qpn_base,
-				ibdev->steer_qpn_base +
-				ibdev->steer_qpn_count - 1);
-		if (err)
-			goto err_steer_free_bitmap;
 	}
 
 	for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
@@ -3056,15 +3061,12 @@
 	first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports);
 
 	dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC);
-	if (!dm) {
-		pr_err("failed to allocate memory for tunneling qp update\n");
+	if (!dm)
 		return;
-	}
 
 	for (i = 0; i < ports; i++) {
 		dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
 		if (!dm[i]) {
-			pr_err("failed to allocate memory for tunneling qp update work struct\n");
 			while (--i >= 0)
 				kfree(dm[i]);
 			goto out;
@@ -3224,8 +3226,6 @@
 		ew->port = port;
 		ew->ib_dev = ibdev;
 		queue_work(wq, &ew->work);
-	} else {
-		pr_err("failed to allocate memory for sl2vl update work\n");
 	}
 }
 
@@ -3285,10 +3285,8 @@
 
 	case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
 		ew = kmalloc(sizeof *ew, GFP_ATOMIC);
-		if (!ew) {
-			pr_err("failed to allocate memory for events work\n");
+		if (!ew)
 			break;
-		}
 
 		INIT_WORK(&ew->work, handle_port_mgmt_change_event);
 		memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index a21d37f..e010fe4 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -1142,7 +1142,6 @@
 	work = kmalloc(sizeof *work, GFP_KERNEL);
 	if (!work) {
 		ctx->flushing = 0;
-		mcg_warn("failed allocating work for cleanup\n");
 		return;
 	}
 
@@ -1202,10 +1201,8 @@
 		return 0;
 
 	req = kzalloc(sizeof *req, GFP_KERNEL);
-	if (!req) {
-		mcg_warn_group(group, "failed allocation - may leave stall groups\n");
+	if (!req)
 		return -ENOMEM;
-	}
 
 	if (!list_empty(&group->func[slave].pending)) {
 		pend_req = list_entry(group->func[slave].pending.prev, struct mcast_req, group_list);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 35141f4..7f3d976 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -742,7 +742,8 @@
 void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
 void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
 
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+				struct ib_udata *udata);
 int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
 int mlx4_ib_destroy_ah(struct ib_ah *ah);
 
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 570bc86..c068add 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -644,7 +644,7 @@
 	int qpn;
 	int err;
 	struct ib_qp_cap backup_cap;
-	struct mlx4_ib_sqp *sqp;
+	struct mlx4_ib_sqp *sqp = NULL;
 	struct mlx4_ib_qp *qp;
 	enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
 	struct mlx4_ib_cq *mcq;
@@ -933,7 +933,9 @@
 		mlx4_db_free(dev->dev, &qp->db);
 
 err:
-	if (!*caller_qp)
+	if (sqp)
+		kfree(sqp);
+	else if (!*caller_qp)
 		kfree(qp);
 	return err;
 }
@@ -1280,7 +1282,8 @@
 	if (is_qp0(dev, mqp))
 		mlx4_CLOSE_PORT(dev->dev, mqp->port);
 
-	if (dev->qp1_proxy[mqp->port - 1] == mqp) {
+	if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI &&
+	    dev->qp1_proxy[mqp->port - 1] == mqp) {
 		mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]);
 		dev->qp1_proxy[mqp->port - 1] = NULL;
 		mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]);
@@ -1764,14 +1767,14 @@
 		u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 :
 			attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
 		union ib_gid gid;
-		struct ib_gid_attr gid_attr;
+		struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB};
 		u16 vlan = 0xffff;
 		u8 smac[ETH_ALEN];
 		int status = 0;
 		int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
 			attr->ah_attr.ah_flags & IB_AH_GRH;
 
-		if (is_eth) {
+		if (is_eth && attr->ah_attr.ah_flags & IB_AH_GRH) {
 			int index = attr->ah_attr.grh.sgid_index;
 
 			status = ib_get_cached_gid(ibqp->device, port_num,
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index 745efa4..d090e96 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -64,7 +64,9 @@
 	return &ah->ibah;
 }
 
-struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+				struct ib_udata *udata)
+
 {
 	struct mlx5_ib_ah *ah;
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
@@ -75,6 +77,27 @@
 	if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH))
 		return ERR_PTR(-EINVAL);
 
+	if (ll == IB_LINK_LAYER_ETHERNET && udata) {
+		int err;
+		struct mlx5_ib_create_ah_resp resp = {};
+		u32 min_resp_len = offsetof(typeof(resp), dmac) +
+				   sizeof(resp.dmac);
+
+		if (udata->outlen < min_resp_len)
+			return ERR_PTR(-EINVAL);
+
+		resp.response_length = min_resp_len;
+
+		err = ib_resolve_eth_dmac(pd->device, ah_attr);
+		if (err)
+			return ERR_PTR(err);
+
+		memcpy(resp.dmac, ah_attr->dmac, ETH_ALEN);
+		err = ib_copy_to_udata(udata, &resp, resp.response_length);
+		if (err)
+			return ERR_PTR(err);
+	}
+
 	ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
 	if (!ah)
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 79d017b..d72a436 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -731,7 +731,7 @@
 			  int entries, u32 **cqb,
 			  int *cqe_size, int *index, int *inlen)
 {
-	struct mlx5_ib_create_cq ucmd;
+	struct mlx5_ib_create_cq ucmd = {};
 	size_t ucmdlen;
 	int page_shift;
 	__be64 *pas;
@@ -770,7 +770,7 @@
 	if (err)
 		goto err_umem;
 
-	mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift,
+	mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift,
 			   &ncont, NULL);
 	mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
 		    ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
@@ -792,8 +792,36 @@
 
 	*index = to_mucontext(context)->uuari.uars[0].index;
 
+	if (ucmd.cqe_comp_en == 1) {
+		if (unlikely((*cqe_size != 64) ||
+			     !MLX5_CAP_GEN(dev->mdev, cqe_compression))) {
+			err = -EOPNOTSUPP;
+			mlx5_ib_warn(dev, "CQE compression is not supported for size %d!\n",
+				     *cqe_size);
+			goto err_cqb;
+		}
+
+		if (unlikely(!ucmd.cqe_comp_res_format ||
+			     !(ucmd.cqe_comp_res_format <
+			       MLX5_IB_CQE_RES_RESERVED) ||
+			     (ucmd.cqe_comp_res_format &
+			      (ucmd.cqe_comp_res_format - 1)))) {
+			err = -EOPNOTSUPP;
+			mlx5_ib_warn(dev, "CQE compression res format %d is not supported!\n",
+				     ucmd.cqe_comp_res_format);
+			goto err_cqb;
+		}
+
+		MLX5_SET(cqc, cqc, cqe_comp_en, 1);
+		MLX5_SET(cqc, cqc, mini_cqe_res_format,
+			 ilog2(ucmd.cqe_comp_res_format));
+	}
+
 	return 0;
 
+err_cqb:
+	kfree(cqb);
+
 err_db:
 	mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
 
@@ -1125,7 +1153,7 @@
 		return err;
 	}
 
-	mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift,
+	mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift,
 			   npas, NULL);
 
 	cq->resize_umem = umem;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 8e0dbd5..b81736d 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -127,7 +127,7 @@
 
 		if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev))
 		    && ibdev->ib_active) {
-			struct ib_event ibev = {0};
+			struct ib_event ibev = { };
 
 			ibev.device = &ibdev->ib_dev;
 			ibev.event = (event == NETDEV_UP) ?
@@ -496,6 +496,7 @@
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 	struct mlx5_core_dev *mdev = dev->mdev;
 	int err = -ENOMEM;
+	int max_sq_desc;
 	int max_rq_sg;
 	int max_sq_sg;
 	u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
@@ -618,9 +619,10 @@
 	props->max_qp_wr	   = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
 	max_rq_sg =  MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
 		     sizeof(struct mlx5_wqe_data_seg);
-	max_sq_sg = (MLX5_CAP_GEN(mdev, max_wqe_sz_sq) -
-		     sizeof(struct mlx5_wqe_ctrl_seg)) /
-		     sizeof(struct mlx5_wqe_data_seg);
+	max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
+	max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) -
+		     sizeof(struct mlx5_wqe_raddr_seg)) /
+		sizeof(struct mlx5_wqe_data_seg);
 	props->max_sge = min(max_rq_sg, max_sq_sg);
 	props->max_sge_rd	   = MLX5_MAX_SGE_RD;
 	props->max_cq		   = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
@@ -643,6 +645,7 @@
 	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
 					   props->max_mcast_grp;
 	props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
+	props->max_ah = INT_MAX;
 	props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
 	props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
 
@@ -669,6 +672,40 @@
 			1 << MLX5_CAP_GEN(dev->mdev, log_max_rq);
 	}
 
+	if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
+			uhw->outlen)) {
+		resp.mlx5_ib_support_multi_pkt_send_wqes =
+			MLX5_CAP_ETH(mdev, multi_pkt_send_wqe);
+		resp.response_length +=
+			sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
+	}
+
+	if (field_avail(typeof(resp), reserved, uhw->outlen))
+		resp.response_length += sizeof(resp.reserved);
+
+	if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
+		resp.cqe_comp_caps.max_num =
+			MLX5_CAP_GEN(dev->mdev, cqe_compression) ?
+			MLX5_CAP_GEN(dev->mdev, cqe_compression_max_num) : 0;
+		resp.cqe_comp_caps.supported_format =
+			MLX5_IB_CQE_RES_FORMAT_HASH |
+			MLX5_IB_CQE_RES_FORMAT_CSUM;
+		resp.response_length += sizeof(resp.cqe_comp_caps);
+	}
+
+	if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen)) {
+		if (MLX5_CAP_QOS(mdev, packet_pacing) &&
+		    MLX5_CAP_GEN(mdev, qos)) {
+			resp.packet_pacing_caps.qp_rate_limit_max =
+				MLX5_CAP_QOS(mdev, packet_pacing_max_rate);
+			resp.packet_pacing_caps.qp_rate_limit_min =
+				MLX5_CAP_QOS(mdev, packet_pacing_min_rate);
+			resp.packet_pacing_caps.supported_qpts |=
+				1 << IB_QPT_RAW_PACKET;
+		}
+		resp.response_length += sizeof(resp.packet_pacing_caps);
+	}
+
 	if (uhw->outlen) {
 		err = ib_copy_to_udata(uhw, &resp, resp.response_length);
 
@@ -1093,7 +1130,8 @@
 		resp.response_length += sizeof(resp.cqe_version);
 
 	if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) {
-		resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE;
+		resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
+				      MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
 		resp.response_length += sizeof(resp.cmds_supp_uhw);
 	}
 
@@ -1502,6 +1540,22 @@
 	MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
 }
 
+static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val,
+			   bool inner)
+{
+	if (inner) {
+		MLX5_SET(fte_match_set_misc,
+			 misc_c, inner_ipv6_flow_label, mask);
+		MLX5_SET(fte_match_set_misc,
+			 misc_v, inner_ipv6_flow_label, val);
+	} else {
+		MLX5_SET(fte_match_set_misc,
+			 misc_c, outer_ipv6_flow_label, mask);
+		MLX5_SET(fte_match_set_misc,
+			 misc_v, outer_ipv6_flow_label, val);
+	}
+}
+
 static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
 {
 	MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
@@ -1515,6 +1569,7 @@
 #define LAST_IPV4_FIELD tos
 #define LAST_IPV6_FIELD traffic_class
 #define LAST_TCP_UDP_FIELD src_port
+#define LAST_TUNNEL_FIELD tunnel_id
 
 /* Field is the last supported field */
 #define FIELDS_NOT_SUPPORTED(filter, field)\
@@ -1527,155 +1582,164 @@
 static int parse_flow_attr(u32 *match_c, u32 *match_v,
 			   const union ib_flow_spec *ib_spec)
 {
-	void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
-					     outer_headers);
-	void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
-					     outer_headers);
 	void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
 					   misc_parameters);
 	void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
 					   misc_parameters);
+	void *headers_c;
+	void *headers_v;
 
-	switch (ib_spec->type) {
+	if (ib_spec->type & IB_FLOW_SPEC_INNER) {
+		headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+					 inner_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+					 inner_headers);
+	} else {
+		headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+					 outer_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+					 outer_headers);
+	}
+
+	switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
 	case IB_FLOW_SPEC_ETH:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
 			return -ENOTSUPP;
 
-		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 					     dmac_47_16),
 				ib_spec->eth.mask.dst_mac);
-		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 					     dmac_47_16),
 				ib_spec->eth.val.dst_mac);
 
-		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 					     smac_47_16),
 				ib_spec->eth.mask.src_mac);
-		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 					     smac_47_16),
 				ib_spec->eth.val.src_mac);
 
 		if (ib_spec->eth.mask.vlan_tag) {
-			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 vlan_tag, 1);
-			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 				 vlan_tag, 1);
 
-			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
-			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 				 first_vid, ntohs(ib_spec->eth.val.vlan_tag));
 
-			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 first_cfi,
 				 ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
-			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 				 first_cfi,
 				 ntohs(ib_spec->eth.val.vlan_tag) >> 12);
 
-			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 				 first_prio,
 				 ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
-			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 				 first_prio,
 				 ntohs(ib_spec->eth.val.vlan_tag) >> 13);
 		}
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 			 ethertype, ntohs(ib_spec->eth.mask.ether_type));
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 			 ethertype, ntohs(ib_spec->eth.val.ether_type));
 		break;
 	case IB_FLOW_SPEC_IPV4:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
 			return -ENOTSUPP;
 
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 			 ethertype, 0xffff);
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 			 ethertype, ETH_P_IP);
 
-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
 		       &ib_spec->ipv4.mask.src_ip,
 		       sizeof(ib_spec->ipv4.mask.src_ip));
-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
 		       &ib_spec->ipv4.val.src_ip,
 		       sizeof(ib_spec->ipv4.val.src_ip));
-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 		       &ib_spec->ipv4.mask.dst_ip,
 		       sizeof(ib_spec->ipv4.mask.dst_ip));
-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 		       &ib_spec->ipv4.val.dst_ip,
 		       sizeof(ib_spec->ipv4.val.dst_ip));
 
-		set_tos(outer_headers_c, outer_headers_v,
+		set_tos(headers_c, headers_v,
 			ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
 
-		set_proto(outer_headers_c, outer_headers_v,
+		set_proto(headers_c, headers_v,
 			  ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto);
 		break;
 	case IB_FLOW_SPEC_IPV6:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
 			return -ENOTSUPP;
 
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 			 ethertype, 0xffff);
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 			 ethertype, ETH_P_IPV6);
 
-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 		       &ib_spec->ipv6.mask.src_ip,
 		       sizeof(ib_spec->ipv6.mask.src_ip));
-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 		       &ib_spec->ipv6.val.src_ip,
 		       sizeof(ib_spec->ipv6.val.src_ip));
-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 		       &ib_spec->ipv6.mask.dst_ip,
 		       sizeof(ib_spec->ipv6.mask.dst_ip));
-		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 		       &ib_spec->ipv6.val.dst_ip,
 		       sizeof(ib_spec->ipv6.val.dst_ip));
 
-		set_tos(outer_headers_c, outer_headers_v,
+		set_tos(headers_c, headers_v,
 			ib_spec->ipv6.mask.traffic_class,
 			ib_spec->ipv6.val.traffic_class);
 
-		set_proto(outer_headers_c, outer_headers_v,
+		set_proto(headers_c, headers_v,
 			  ib_spec->ipv6.mask.next_hdr,
 			  ib_spec->ipv6.val.next_hdr);
 
-		MLX5_SET(fte_match_set_misc, misc_params_c,
-			 outer_ipv6_flow_label,
-			 ntohl(ib_spec->ipv6.mask.flow_label));
-		MLX5_SET(fte_match_set_misc, misc_params_v,
-			 outer_ipv6_flow_label,
-			 ntohl(ib_spec->ipv6.val.flow_label));
+		set_flow_label(misc_params_c, misc_params_v,
+			       ntohl(ib_spec->ipv6.mask.flow_label),
+			       ntohl(ib_spec->ipv6.val.flow_label),
+			       ib_spec->type & IB_FLOW_SPEC_INNER);
+
 		break;
 	case IB_FLOW_SPEC_TCP:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
 					 LAST_TCP_UDP_FIELD))
 			return -ENOTSUPP;
 
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
 			 0xff);
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
 			 IPPROTO_TCP);
 
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
 			 ntohs(ib_spec->tcp_udp.mask.src_port));
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
 			 ntohs(ib_spec->tcp_udp.val.src_port));
 
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
 			 ntohs(ib_spec->tcp_udp.mask.dst_port));
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
 			 ntohs(ib_spec->tcp_udp.val.dst_port));
 		break;
 	case IB_FLOW_SPEC_UDP:
@@ -1683,21 +1747,31 @@
 					 LAST_TCP_UDP_FIELD))
 			return -ENOTSUPP;
 
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
 			 0xff);
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
 			 IPPROTO_UDP);
 
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
 			 ntohs(ib_spec->tcp_udp.mask.src_port));
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
 			 ntohs(ib_spec->tcp_udp.val.src_port));
 
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
 			 ntohs(ib_spec->tcp_udp.mask.dst_port));
-		MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport,
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
 			 ntohs(ib_spec->tcp_udp.val.dst_port));
 		break;
+	case IB_FLOW_SPEC_VXLAN_TUNNEL:
+		if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
+					 LAST_TUNNEL_FIELD))
+			return -ENOTSUPP;
+
+		MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
+			 ntohl(ib_spec->tunnel.mask.tunnel_id));
+		MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
+			 ntohl(ib_spec->tunnel.val.tunnel_id));
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -2718,6 +2792,8 @@
 			       struct ib_port_immutable *immutable)
 {
 	struct ib_port_attr attr;
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
 	int err;
 
 	err = mlx5_ib_query_port(ibdev, port_num, &attr);
@@ -2727,7 +2803,8 @@
 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
 	immutable->gid_tbl_len = attr.gid_tbl_len;
 	immutable->core_cap_flags = get_core_cap_flags(ibdev);
-	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+	if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
+		immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
 	return 0;
 }
@@ -2741,7 +2818,7 @@
 		       fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
 }
 
-static int mlx5_roce_lag_init(struct mlx5_ib_dev *dev)
+static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
 	struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev,
@@ -2770,7 +2847,7 @@
 	return err;
 }
 
-static void mlx5_roce_lag_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
 
@@ -2782,15 +2859,7 @@
 	}
 }
 
-static void mlx5_remove_roce_notifier(struct mlx5_ib_dev *dev)
-{
-	if (dev->roce.nb.notifier_call) {
-		unregister_netdevice_notifier(&dev->roce.nb);
-		dev->roce.nb.notifier_call = NULL;
-	}
-}
-
-static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
+static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev)
 {
 	int err;
 
@@ -2801,28 +2870,51 @@
 		return err;
 	}
 
-	err = mlx5_nic_vport_enable_roce(dev->mdev);
-	if (err)
-		goto err_unregister_netdevice_notifier;
+	return 0;
+}
 
-	err = mlx5_roce_lag_init(dev);
+static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev)
+{
+	if (dev->roce.nb.notifier_call) {
+		unregister_netdevice_notifier(&dev->roce.nb);
+		dev->roce.nb.notifier_call = NULL;
+	}
+}
+
+static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
+{
+	int err;
+
+	err = mlx5_add_netdev_notifier(dev);
+	if (err)
+		return err;
+
+	if (MLX5_CAP_GEN(dev->mdev, roce)) {
+		err = mlx5_nic_vport_enable_roce(dev->mdev);
+		if (err)
+			goto err_unregister_netdevice_notifier;
+	}
+
+	err = mlx5_eth_lag_init(dev);
 	if (err)
 		goto err_disable_roce;
 
 	return 0;
 
 err_disable_roce:
-	mlx5_nic_vport_disable_roce(dev->mdev);
+	if (MLX5_CAP_GEN(dev->mdev, roce))
+		mlx5_nic_vport_disable_roce(dev->mdev);
 
 err_unregister_netdevice_notifier:
-	mlx5_remove_roce_notifier(dev);
+	mlx5_remove_netdev_notifier(dev);
 	return err;
 }
 
-static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
+static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
 {
-	mlx5_roce_lag_cleanup(dev);
-	mlx5_nic_vport_disable_roce(dev->mdev);
+	mlx5_eth_lag_cleanup(dev);
+	if (MLX5_CAP_GEN(dev->mdev, roce))
+		mlx5_nic_vport_disable_roce(dev->mdev);
 }
 
 static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
@@ -2944,9 +3036,6 @@
 	port_type_cap = MLX5_CAP_GEN(mdev, port_type);
 	ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
-	if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce))
-		return NULL;
-
 	printk_once(KERN_INFO "%s", mlx5_version);
 
 	dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
@@ -2992,6 +3081,8 @@
 		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
 		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
 		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
+		(1ull << IB_USER_VERBS_CMD_CREATE_AH)		|
+		(1ull << IB_USER_VERBS_CMD_DESTROY_AH)		|
 		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
 		(1ull << IB_USER_VERBS_CMD_REREG_MR)		|
 		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
@@ -3014,7 +3105,8 @@
 	dev->ib_dev.uverbs_ex_cmd_mask =
 		(1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE)	|
 		(1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ)	|
-		(1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
+		(1ull << IB_USER_VERBS_EX_CMD_CREATE_QP)	|
+		(1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP);
 
 	dev->ib_dev.query_device	= mlx5_ib_query_device;
 	dev->ib_dev.query_port		= mlx5_ib_query_port;
@@ -3125,14 +3217,14 @@
 	spin_lock_init(&dev->reset_flow_resource_lock);
 
 	if (ll == IB_LINK_LAYER_ETHERNET) {
-		err = mlx5_enable_roce(dev);
+		err = mlx5_enable_eth(dev);
 		if (err)
 			goto err_dealloc;
 	}
 
 	err = create_dev_resources(&dev->devr);
 	if (err)
-		goto err_disable_roce;
+		goto err_disable_eth;
 
 	err = mlx5_ib_odp_init_one(dev);
 	if (err)
@@ -3176,10 +3268,10 @@
 err_rsrc:
 	destroy_dev_resources(&dev->devr);
 
-err_disable_roce:
+err_disable_eth:
 	if (ll == IB_LINK_LAYER_ETHERNET) {
-		mlx5_disable_roce(dev);
-		mlx5_remove_roce_notifier(dev);
+		mlx5_disable_eth(dev);
+		mlx5_remove_netdev_notifier(dev);
 	}
 
 err_free_port:
@@ -3196,14 +3288,14 @@
 	struct mlx5_ib_dev *dev = context;
 	enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
 
-	mlx5_remove_roce_notifier(dev);
+	mlx5_remove_netdev_notifier(dev);
 	ib_unregister_device(&dev->ib_dev);
 	mlx5_ib_dealloc_q_counters(dev);
 	destroy_umrc_res(dev);
 	mlx5_ib_odp_remove_one(dev);
 	destroy_dev_resources(&dev->devr);
 	if (ll == IB_LINK_LAYER_ETHERNET)
-		mlx5_disable_roce(dev);
+		mlx5_disable_eth(dev);
 	kfree(dev->port);
 	ib_dealloc_device(&dev->ib_dev);
 }
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 996b54e..6851357 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -37,12 +37,15 @@
 
 /* @umem: umem object to scan
  * @addr: ib virtual address requested by the user
+ * @max_page_shift: high limit for page_shift - 0 means no limit
  * @count: number of PAGE_SIZE pages covered by umem
  * @shift: page shift for the compound pages found in the region
  * @ncont: number of compund pages
  * @order: log2 of the number of compound pages
  */
-void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
+void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
+			unsigned long max_page_shift,
+			int *count, int *shift,
 			int *ncont, int *order)
 {
 	unsigned long tmp;
@@ -72,6 +75,8 @@
 	addr = addr >> page_shift;
 	tmp = (unsigned long)addr;
 	m = find_first_bit(&tmp, BITS_PER_LONG);
+	if (max_page_shift)
+		m = min_t(unsigned long, max_page_shift - page_shift, m);
 	skip = 1 << m;
 	mask = skip - 1;
 	i = 0;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index d5d0077..ab8961c 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -63,6 +63,8 @@
 #define MLX5_IB_DEFAULT_UIDX 0xffffff
 #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
 
+#define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size)
+
 enum {
 	MLX5_IB_MMAP_CMD_SHIFT	= 8,
 	MLX5_IB_MMAP_CMD_MASK	= 0xff,
@@ -387,6 +389,7 @@
 	struct list_head	qps_list;
 	struct list_head	cq_recv_list;
 	struct list_head	cq_send_list;
+	u32			rate_limit;
 };
 
 struct mlx5_ib_cq_buf {
@@ -418,7 +421,7 @@
 	struct ib_pd		       *pd;
 	unsigned int			page_shift;
 	unsigned int			npages;
-	u32				length;
+	u64				length;
 	int				access_flags;
 	u32				mkey;
 };
@@ -737,7 +740,8 @@
 int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
 		 u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
 		 const void *in_mad, void *response_mad);
-struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+				struct ib_udata *udata);
 int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
 int mlx5_ib_destroy_ah(struct ib_ah *ah);
 struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
@@ -823,7 +827,9 @@
 		       struct ib_port_attr *props);
 int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev);
 void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
-void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
+void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
+			unsigned long max_page_shift,
+			int *count, int *shift,
 			int *ncont, int *order);
 void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 			    int page_shift, size_t offset, size_t num_pages,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index d4ad672..67985c6 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -627,7 +627,8 @@
 		ent->order = i + 2;
 		ent->dev = dev;
 
-		if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE)
+		if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+		    (mlx5_core_is_pf(dev->mdev)))
 			limit = dev->mdev->profile->mr_cache[i].limit;
 		else
 			limit = 0;
@@ -645,6 +646,33 @@
 	return 0;
 }
 
+static void wait_for_async_commands(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_mr_cache *cache = &dev->cache;
+	struct mlx5_cache_ent *ent;
+	int total = 0;
+	int i;
+	int j;
+
+	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+		ent = &cache->ent[i];
+		for (j = 0 ; j < 1000; j++) {
+			if (!ent->pending)
+				break;
+			msleep(50);
+		}
+	}
+	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+		ent = &cache->ent[i];
+		total += ent->pending;
+	}
+
+	if (total)
+		mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total);
+	else
+		mlx5_ib_warn(dev, "done with all pending requests\n");
+}
+
 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
 {
 	int i;
@@ -658,6 +686,7 @@
 		clean_keys(dev, i);
 
 	destroy_workqueue(dev->cache.wq);
+	wait_for_async_commands(dev);
 	del_timer_sync(&dev->delay_timer);
 
 	return 0;
@@ -815,29 +844,34 @@
 	umrwr->mkey = key;
 }
 
-static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
-				   int access_flags, int *npages,
-				   int *page_shift, int *ncont, int *order)
+static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
+		       int access_flags, struct ib_umem **umem,
+		       int *npages, int *page_shift, int *ncont,
+		       int *order)
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
-	struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length,
-					   access_flags, 0);
-	if (IS_ERR(umem)) {
+	int err;
+
+	*umem = ib_umem_get(pd->uobject->context, start, length,
+			    access_flags, 0);
+	err = PTR_ERR_OR_ZERO(*umem);
+	if (err < 0) {
 		mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
-		return (void *)umem;
+		return err;
 	}
 
-	mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order);
+	mlx5_ib_cont_pages(*umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
+			   page_shift, ncont, order);
 	if (!*npages) {
 		mlx5_ib_warn(dev, "avoid zero region\n");
-		ib_umem_release(umem);
-		return ERR_PTR(-EINVAL);
+		ib_umem_release(*umem);
+		return -EINVAL;
 	}
 
 	mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
 		    *npages, *ncont, *order, *page_shift);
 
-	return umem;
+	return 0;
 }
 
 static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
@@ -1163,11 +1197,11 @@
 
 	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
 		    start, virt_addr, length, access_flags);
-	umem = mr_umem_get(pd, start, length, access_flags, &npages,
+	err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
 			   &page_shift, &ncont, &order);
 
-	if (IS_ERR(umem))
-		return (void *)umem;
+        if (err < 0)
+		return ERR_PTR(err);
 
 	if (use_umr(order)) {
 		mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
@@ -1341,10 +1375,9 @@
 		 */
 		flags |= IB_MR_REREG_TRANS;
 		ib_umem_release(mr->umem);
-		mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages,
-				       &page_shift, &ncont, &order);
-		if (IS_ERR(mr->umem)) {
-			err = PTR_ERR(mr->umem);
+		err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
+				  &npages, &page_shift, &ncont, &order);
+		if (err < 0) {
 			mr->umem = NULL;
 			return err;
 		}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 41f4c2a..cc24f2d 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -78,12 +78,14 @@
 
 enum raw_qp_set_mask_map {
 	MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID		= 1UL << 0,
+	MLX5_RAW_QP_RATE_LIMIT			= 1UL << 1,
 };
 
 struct mlx5_modify_raw_qp_param {
 	u16 operation;
 
 	u32 set_mask; /* raw_qp_set_mask_map */
+	u32 rate_limit;
 	u8 rq_q_ctr_id;
 };
 
@@ -352,6 +354,29 @@
 		return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
 }
 
+static int get_send_sge(struct ib_qp_init_attr *attr, int wqe_size)
+{
+	int max_sge;
+
+	if (attr->qp_type == IB_QPT_RC)
+		max_sge = (min_t(int, wqe_size, 512) -
+			   sizeof(struct mlx5_wqe_ctrl_seg) -
+			   sizeof(struct mlx5_wqe_raddr_seg)) /
+			sizeof(struct mlx5_wqe_data_seg);
+	else if (attr->qp_type == IB_QPT_XRC_INI)
+		max_sge = (min_t(int, wqe_size, 512) -
+			   sizeof(struct mlx5_wqe_ctrl_seg) -
+			   sizeof(struct mlx5_wqe_xrc_seg) -
+			   sizeof(struct mlx5_wqe_raddr_seg)) /
+			sizeof(struct mlx5_wqe_data_seg);
+	else
+		max_sge = (wqe_size - sq_overhead(attr)) /
+			sizeof(struct mlx5_wqe_data_seg);
+
+	return min_t(int, max_sge, wqe_size - sq_overhead(attr) /
+		     sizeof(struct mlx5_wqe_data_seg));
+}
+
 static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
 			struct mlx5_ib_qp *qp)
 {
@@ -382,13 +407,18 @@
 	wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
 	qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
 	if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
-		mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n",
+		mlx5_ib_dbg(dev, "send queue size (%d * %d / %d -> %d) exceeds limits(%d)\n",
+			    attr->cap.max_send_wr, wqe_size, MLX5_SEND_WQE_BB,
 			    qp->sq.wqe_cnt,
 			    1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz));
 		return -ENOMEM;
 	}
 	qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
-	qp->sq.max_gs = attr->cap.max_send_sge;
+	qp->sq.max_gs = get_send_sge(attr, wqe_size);
+	if (qp->sq.max_gs < attr->cap.max_send_sge)
+		return -ENOMEM;
+
+	attr->cap.max_send_sge = qp->sq.max_gs;
 	qp->sq.max_post = wq_size / wqe_size;
 	attr->cap.max_send_wr = qp->sq.max_post;
 
@@ -648,7 +678,7 @@
 		return PTR_ERR(*umem);
 	}
 
-	mlx5_ib_cont_pages(*umem, addr, npages, page_shift, ncont, NULL);
+	mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont, NULL);
 
 	err = mlx5_ib_get_buf_offset(addr, *page_shift, offset);
 	if (err) {
@@ -701,7 +731,7 @@
 		return err;
 	}
 
-	mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, &npages, &page_shift,
+	mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift,
 			   &ncont, NULL);
 	err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift,
 				     &rwq->rq_page_offset);
@@ -2443,8 +2473,14 @@
 }
 
 static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
-				   struct mlx5_ib_sq *sq, int new_state)
+				   struct mlx5_ib_sq *sq,
+				   int new_state,
+				   const struct mlx5_modify_raw_qp_param *raw_qp_param)
 {
+	struct mlx5_ib_qp *ibqp = sq->base.container_mibqp;
+	u32 old_rate = ibqp->rate_limit;
+	u32 new_rate = old_rate;
+	u16 rl_index = 0;
 	void *in;
 	void *sqc;
 	int inlen;
@@ -2460,10 +2496,44 @@
 	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
 	MLX5_SET(sqc, sqc, state, new_state);
 
-	err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
-	if (err)
-		goto out;
+	if (raw_qp_param->set_mask & MLX5_RAW_QP_RATE_LIMIT) {
+		if (new_state != MLX5_SQC_STATE_RDY)
+			pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n",
+				__func__);
+		else
+			new_rate = raw_qp_param->rate_limit;
+	}
 
+	if (old_rate != new_rate) {
+		if (new_rate) {
+			err = mlx5_rl_add_rate(dev, new_rate, &rl_index);
+			if (err) {
+				pr_err("Failed configuring rate %u: %d\n",
+				       new_rate, err);
+				goto out;
+			}
+		}
+
+		MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
+		MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index);
+	}
+
+	err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
+	if (err) {
+		/* Remove new rate from table if failed */
+		if (new_rate &&
+		    old_rate != new_rate)
+			mlx5_rl_remove_rate(dev, new_rate);
+		goto out;
+	}
+
+	/* Only remove the old rate after new rate was set */
+	if ((old_rate &&
+	    (old_rate != new_rate)) ||
+	    (new_state != MLX5_SQC_STATE_RDY))
+		mlx5_rl_remove_rate(dev, old_rate);
+
+	ibqp->rate_limit = new_rate;
 	sq->state = new_state;
 
 out:
@@ -2478,6 +2548,8 @@
 	struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
 	struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
 	struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+	int modify_rq = !!qp->rq.wqe_cnt;
+	int modify_sq = !!qp->sq.wqe_cnt;
 	int rq_state;
 	int sq_state;
 	int err;
@@ -2495,10 +2567,18 @@
 		rq_state = MLX5_RQC_STATE_RST;
 		sq_state = MLX5_SQC_STATE_RST;
 		break;
-	case MLX5_CMD_OP_INIT2INIT_QP:
-	case MLX5_CMD_OP_INIT2RTR_QP:
 	case MLX5_CMD_OP_RTR2RTS_QP:
 	case MLX5_CMD_OP_RTS2RTS_QP:
+		if (raw_qp_param->set_mask ==
+		    MLX5_RAW_QP_RATE_LIMIT) {
+			modify_rq = 0;
+			sq_state = sq->state;
+		} else {
+			return raw_qp_param->set_mask ? -EINVAL : 0;
+		}
+		break;
+	case MLX5_CMD_OP_INIT2INIT_QP:
+	case MLX5_CMD_OP_INIT2RTR_QP:
 		if (raw_qp_param->set_mask)
 			return -EINVAL;
 		else
@@ -2508,13 +2588,13 @@
 		return -EINVAL;
 	}
 
-	if (qp->rq.wqe_cnt) {
-		err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param);
+	if (modify_rq) {
+		err =  modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param);
 		if (err)
 			return err;
 	}
 
-	if (qp->sq.wqe_cnt) {
+	if (modify_sq) {
 		if (tx_affinity) {
 			err = modify_raw_packet_tx_affinity(dev->mdev, sq,
 							    tx_affinity);
@@ -2522,7 +2602,7 @@
 				return err;
 		}
 
-		return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state);
+		return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, raw_qp_param);
 	}
 
 	return 0;
@@ -2578,7 +2658,6 @@
 	struct mlx5_ib_port *mibport = NULL;
 	enum mlx5_qp_state mlx5_cur, mlx5_new;
 	enum mlx5_qp_optpar optpar;
-	int sqd_event;
 	int mlx5_st;
 	int err;
 	u16 op;
@@ -2725,12 +2804,6 @@
 	if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
 		context->db_rec_addr = cpu_to_be64(qp->db.dma);
 
-	if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD	&&
-	    attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
-		sqd_event = 1;
-	else
-		sqd_event = 0;
-
 	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
 		u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
 			       qp->port) - 1;
@@ -2777,6 +2850,12 @@
 			raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id;
 			raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
 		}
+
+		if (attr_mask & IB_QP_RATE_LIMIT) {
+			raw_qp_param.rate_limit = attr->rate_limit;
+			raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT;
+		}
+
 		err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity);
 	} else {
 		err = mlx5_core_qp_modify(dev->mdev, op, optpar, context,
@@ -3068,10 +3147,10 @@
 {
 	memset(umr, 0, sizeof(*umr));
 	umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
-	umr->flags = 1 << 7;
+	umr->flags = MLX5_UMR_INLINE;
 }
 
-static __be64 get_umr_reg_mr_mask(void)
+static __be64 get_umr_reg_mr_mask(int atomic)
 {
 	u64 result;
 
@@ -3084,9 +3163,11 @@
 		 MLX5_MKEY_MASK_KEY		|
 		 MLX5_MKEY_MASK_RR		|
 		 MLX5_MKEY_MASK_RW		|
-		 MLX5_MKEY_MASK_A		|
 		 MLX5_MKEY_MASK_FREE;
 
+	if (atomic)
+		result |= MLX5_MKEY_MASK_A;
+
 	return cpu_to_be64(result);
 }
 
@@ -3147,7 +3228,7 @@
 }
 
 static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
-				struct ib_send_wr *wr)
+				struct ib_send_wr *wr, int atomic)
 {
 	struct mlx5_umr_wr *umrwr = umr_wr(wr);
 
@@ -3172,7 +3253,7 @@
 		if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD)
 			umr->mkey_mask |= get_umr_update_pd_mask();
 		if (!umr->mkey_mask)
-			umr->mkey_mask = get_umr_reg_mr_mask();
+			umr->mkey_mask = get_umr_reg_mr_mask(atomic);
 	} else {
 		umr->mkey_mask = get_umr_unreg_mr_mask();
 	}
@@ -4025,7 +4106,7 @@
 			}
 			qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
 			ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey);
-			set_reg_umr_segment(seg, wr);
+			set_reg_umr_segment(seg, wr, !!(MLX5_CAP_GEN(mdev, atomic)));
 			seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
 			size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
 			if (unlikely((seg == qend)))
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 3857dbd..6f4397e 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -118,7 +118,7 @@
 		return err;
 	}
 
-	mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, &npages,
+	mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages,
 			   &page_shift, &ncont, NULL);
 	err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift,
 				     &offset);
@@ -203,8 +203,6 @@
 
 	srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL);
 	if (!srq->wrid) {
-		mlx5_ib_dbg(dev, "kmalloc failed %lu\n",
-			    (unsigned long)(srq->msrq.max * sizeof(u64)));
 		err = -ENOMEM;
 		goto err_in;
 	}
@@ -282,6 +280,7 @@
 	mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n",
 		    desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
 		    srq->msrq.max_avail_gather);
+	in.type = init_attr->srq_type;
 
 	if (pd->uobject)
 		err = create_srq_user(pd, srq, &in, udata, buf_size);
@@ -294,7 +293,6 @@
 		goto err_srq;
 	}
 
-	in.type = init_attr->srq_type;
 	in.log_size = ilog2(srq->msrq.max);
 	in.wqe_shift = srq->msrq.wqe_shift - 4;
 	if (srq->wq_sig)
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index bcac294..c9f0f36 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -186,8 +186,8 @@
 
 on_hca_fail:
 	if (ah->type == MTHCA_AH_PCI_POOL) {
-		ah->av = pci_pool_alloc(dev->av_table.pool,
-					GFP_ATOMIC, &ah->avdma);
+		ah->av = pci_pool_zalloc(dev->av_table.pool,
+					 GFP_ATOMIC, &ah->avdma);
 		if (!ah->av)
 			return -ENOMEM;
 
@@ -196,8 +196,6 @@
 
 	ah->key = pd->ntmr.ibmr.lkey;
 
-	memset(av, 0, MTHCA_AV_SIZE);
-
 	av->port_pd = cpu_to_be32(pd->pd_num | (ah_attr->port_num << 24));
 	av->g_slid  = ah_attr->src_path_bits;
 	av->dlid    = cpu_to_be16(ah_attr->dlid);
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 358930a4..d317087 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -410,7 +410,9 @@
 }
 
 static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
-				     struct ib_ah_attr *ah_attr)
+				     struct ib_ah_attr *ah_attr,
+				     struct ib_udata *udata)
+
 {
 	int err;
 	struct mthca_ah *ah;
diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c
index 6727af2..2a6979e 100644
--- a/drivers/infiniband/hw/mthca/mthca_reset.c
+++ b/drivers/infiniband/hw/mthca/mthca_reset.c
@@ -96,8 +96,6 @@
 	hca_header = kmalloc(256, GFP_KERNEL);
 	if (!hca_header) {
 		err = -ENOMEM;
-		mthca_err(mdev, "Couldn't allocate memory to save HCA "
-			  "PCI header, aborting.\n");
 		goto put_dev;
 	}
 
@@ -119,8 +117,6 @@
 		bridge_header = kmalloc(256, GFP_KERNEL);
 		if (!bridge_header) {
 			err = -ENOMEM;
-			mthca_err(mdev, "Couldn't allocate memory to save HCA "
-				  "bridge PCI header, aborting.\n");
 			goto free_hca;
 		}
 
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 35cbb17..9badd02 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -516,7 +516,6 @@
 	/* Allocate hardware structure */
 	nesdev = kzalloc(sizeof(struct nes_device), GFP_KERNEL);
 	if (!nesdev) {
-		printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n", pci_name(pcidev));
 		ret = -ENOMEM;
 		goto bail2;
 	}
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 57db9b3..8e70347 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -2282,10 +2282,8 @@
 	if (!listener) {
 		/* create a CM listen node (1/2 node to compare incoming traffic to) */
 		listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
-		if (!listener) {
-			nes_debug(NES_DBG_CM, "Not creating listener memory allocation failed\n");
+		if (!listener)
 			return NULL;
-		}
 
 		listener->loc_addr = cm_info->loc_addr;
 		listener->loc_port = cm_info->loc_port;
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index a1c6481..19acd13 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -351,9 +351,8 @@
 
 	/* allocate a new adapter struct */
 	nesadapter = kzalloc(adapter_size, GFP_KERNEL);
-	if (nesadapter == NULL) {
+	if (!nesadapter)
 		return NULL;
-	}
 
 	nes_debug(NES_DBG_INIT, "Allocating new nesadapter @ %p, size = %u (actual size = %u).\n",
 			nesadapter, (u32)sizeof(struct nes_adapter), adapter_size);
@@ -1007,8 +1006,7 @@
 	/* Allocate a twice the number of CQP requests as the SQ size */
 	nesdev->nes_cqp_requests = kzalloc(sizeof(struct nes_cqp_request) *
 			2 * NES_CQP_SQ_SIZE, GFP_KERNEL);
-	if (nesdev->nes_cqp_requests == NULL) {
-		nes_debug(NES_DBG_INIT, "Unable to allocate memory CQP request entries.\n");
+	if (!nesdev->nes_cqp_requests) {
 		pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase,
 				nesdev->cqp.sq_pbase);
 		return -ENOMEM;
diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c
index 4166452..33624f1 100644
--- a/drivers/infiniband/hw/nes/nes_mgt.c
+++ b/drivers/infiniband/hw/nes/nes_mgt.c
@@ -320,8 +320,7 @@
 
 	/* Found one */
 	fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC);
-	if (fpdu_info == NULL) {
-		nes_debug(NES_DBG_PAU, "Failed to alloc a fpdu_info.\n");
+	if (!fpdu_info) {
 		rc = -ENOMEM;
 		goto out;
 	}
@@ -729,8 +728,7 @@
 	}
 
 	qh_chg = kmalloc(sizeof *qh_chg, GFP_ATOMIC);
-	if (qh_chg == NULL) {
-		nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n");
+	if (!qh_chg) {
 		ret = -ENOMEM;
 		goto chg_qh_err;
 	}
@@ -880,10 +878,8 @@
 
 	/* Allocate space the all mgt QPs once */
 	mgtvnic = kzalloc(NES_MGT_QP_COUNT * sizeof(struct nes_vnic_mgt), GFP_KERNEL);
-	if (mgtvnic == NULL) {
-		nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt structure\n");
+	if (!mgtvnic)
 		return -ENOMEM;
-	}
 
 	/* Allocate fragment, RQ, and CQ; Reuse CEQ based on the PCI function */
 	/* We are not sending from this NIC so sq is not allocated */
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index bd69125..aff9fb1 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -771,7 +771,8 @@
 /**
  * nes_create_ah
  */
-static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+				   struct ib_udata *udata)
 {
 	return ERR_PTR(-ENOSYS);
 }
@@ -1075,7 +1076,6 @@
 			mem = kzalloc(sizeof(*nesqp)+NES_SW_CONTEXT_ALIGN-1, GFP_KERNEL);
 			if (!mem) {
 				nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
-				nes_debug(NES_DBG_QP, "Unable to allocate QP\n");
 				return ERR_PTR(-ENOMEM);
 			}
 			u64nesqp = (unsigned long)mem;
@@ -1475,7 +1475,6 @@
 	nescq = kzalloc(sizeof(struct nes_cq), GFP_KERNEL);
 	if (!nescq) {
 		nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
-		nes_debug(NES_DBG_CQ, "Unable to allocate nes_cq struct\n");
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -2408,7 +2407,6 @@
 			}
 			nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL);
 			if (!nespbl) {
-				nes_debug(NES_DBG_MR, "Unable to allocate PBL\n");
 				ib_umem_release(region);
 				return ERR_PTR(-ENOMEM);
 			}
@@ -2416,7 +2414,6 @@
 			if (!nesmr) {
 				ib_umem_release(region);
 				kfree(nespbl);
-				nes_debug(NES_DBG_MR, "Unable to allocate nesmr\n");
 				return ERR_PTR(-ENOMEM);
 			}
 			nesmr->region = region;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 797362a..14d33b0 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -154,7 +154,8 @@
 	return status;
 }
 
-struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
+struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+			       struct ib_udata *udata)
 {
 	u32 *ahid_addr;
 	int status;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
index 3856dd4..0704a24 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
@@ -50,7 +50,9 @@
 	OCRDMA_AH_L3_TYPE_MASK		= 0x03,
 	OCRDMA_AH_L3_TYPE_SHIFT		= 0x1D /* 29 bits */
 };
-struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *);
+
+struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *,
+			       struct ib_udata *);
 int ocrdma_destroy_ah(struct ib_ah *);
 int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *);
 int ocrdma_modify_ah(struct ib_ah *, struct ib_ah_attr *);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 6876a71..9a30520 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -1596,10 +1596,9 @@
 
 	dev->pd_mgr = kzalloc(sizeof(struct ocrdma_pd_resource_mgr),
 			      GFP_KERNEL);
-	if (!dev->pd_mgr) {
-		pr_err("%s(%d)Memory allocation failure.\n", __func__, dev->id);
+	if (!dev->pd_mgr)
 		return;
-	}
+
 	status = ocrdma_mbx_alloc_pd_range(dev);
 	if (status) {
 		pr_err("%s(%d) Unable to initialize PD pool, using default.\n",
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 8bef09a..f8e4b0a 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -84,10 +84,8 @@
 
 	/* Alloc debugfs mem */
 	mem->debugfs_mem = kzalloc(OCRDMA_MAX_DBGFS_MEM, GFP_KERNEL);
-	if (!mem->debugfs_mem) {
-		pr_err("%s: stats debugfs mem allocation failed\n", __func__);
+	if (!mem->debugfs_mem)
 		return false;
-	}
 
 	return true;
 }
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index a615142..302fb05 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -511,8 +511,10 @@
 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
 	struct qedr_pd *pd = get_qedr_pd(ibpd);
 
-	if (!pd)
+	if (!pd) {
 		pr_err("Invalid PD received in dealloc_pd\n");
+		return -EINVAL;
+	}
 
 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
@@ -1477,6 +1479,7 @@
 	struct qedr_ucontext *ctx = NULL;
 	struct qedr_create_qp_ureq ureq;
 	struct qedr_qp *qp;
+	struct ib_qp *ibqp;
 	int rc = 0;
 
 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
@@ -1486,13 +1489,13 @@
 	if (rc)
 		return ERR_PTR(rc);
 
+	if (attrs->srq)
+		return ERR_PTR(-EINVAL);
+
 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
 	if (!qp)
 		return ERR_PTR(-ENOMEM);
 
-	if (attrs->srq)
-		return ERR_PTR(-EINVAL);
-
 	DP_DEBUG(dev, QEDR_MSG_QP,
 		 "create qp: sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
 		 get_qedr_cq(attrs->send_cq),
@@ -1508,7 +1511,10 @@
 			       "create qp: unexpected udata when creating GSI QP\n");
 			goto err0;
 		}
-		return qedr_create_gsi_qp(dev, attrs, qp);
+		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
+		if (IS_ERR(ibqp))
+			kfree(qp);
+		return ibqp;
 	}
 
 	memset(&in_params, 0, sizeof(in_params));
@@ -2094,7 +2100,8 @@
 	return rc;
 }
 
-struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
+struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+			     struct ib_udata *udata)
 {
 	struct qedr_ah *ah;
 
@@ -2413,8 +2420,7 @@
 		 */
 		pbl = list_first_entry(&info->inuse_pbl_list,
 				       struct qedr_pbl, list_entry);
-		list_del(&pbl->list_entry);
-		list_add_tail(&pbl->list_entry, &info->free_pbl_list);
+		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
 		info->completed_handled++;
 	}
 }
@@ -2981,11 +2987,6 @@
 		return -EINVAL;
 	}
 
-	if (!wr) {
-		DP_ERR(dev, "Got an empty post send.\n");
-		return -EINVAL;
-	}
-
 	while (wr) {
 		rc = __qedr_post_send(ibqp, wr, bad_wr);
 		if (rc)
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index a9b5e67..070677c 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -70,7 +70,8 @@
 		  int qp_attr_mask, struct ib_qp_init_attr *);
 int qedr_destroy_qp(struct ib_qp *ibqp);
 
-struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr);
+struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+			     struct ib_udata *udata);
 int qedr_destroy_ah(struct ib_ah *ibah);
 
 int qedr_dereg_mr(struct ib_mr *);
diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c
index 8c34b23..775018b 100644
--- a/drivers/infiniband/hw/qib/qib_diag.c
+++ b/drivers/infiniband/hw/qib/qib_diag.c
@@ -609,8 +609,6 @@
 
 	tmpbuf = vmalloc(plen);
 	if (!tmpbuf) {
-		qib_devinfo(dd->pcidev,
-			"Unable to allocate tmp buffer, failing\n");
 		ret = -ENOMEM;
 		goto bail;
 	}
@@ -702,10 +700,8 @@
 	if (!dd || !op)
 		return -EINVAL;
 	olp = vmalloc(sizeof(*olp));
-	if (!olp) {
-		pr_err("vmalloc for observer failed\n");
+	if (!olp)
 		return -ENOMEM;
-	}
 
 	spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
 	olp->op = op;
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 728e0a0..2b5982f 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -420,8 +420,7 @@
 						if (list_empty(&qp->rspwait)) {
 							qp->r_flags |=
 								RVT_R_RSP_NAK;
-							atomic_inc(
-								&qp->refcount);
+							rvt_get_qp(qp);
 							list_add_tail(
 							 &qp->rspwait,
 							 &rcd->qp_wait_list);
diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c
index 311ee6c..33a2e74 100644
--- a/drivers/infiniband/hw/qib/qib_eeprom.c
+++ b/drivers/infiniband/hw/qib/qib_eeprom.c
@@ -182,12 +182,8 @@
 	 * */
 	len = sizeof(struct qib_flash);
 	buf = vmalloc(len);
-	if (!buf) {
-		qib_dev_err(dd,
-			"Couldn't allocate memory to read %u bytes from eeprom for GUID\n",
-			len);
+	if (!buf)
 		goto bail;
-	}
 
 	/*
 	 * Use "public" eeprom read function, which does locking and
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 382466a..2d1eacf 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -2066,8 +2066,11 @@
 	ssize_t ret = 0;
 	void *dest;
 
-	if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
+	if (!ib_safe_file_access(fp)) {
+		pr_err_once("qib_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+			    task_tgid_vnr(current), current->comm);
 		return -EACCES;
+	}
 
 	if (count < sizeof(cmd.type)) {
 		ret = -EINVAL;
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index a3733f2..92399d3 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -1759,9 +1759,7 @@
 	}
 	namelen = strlen(n) + 1;
 	dd->boardname = kmalloc(namelen, GFP_KERNEL);
-	if (!dd->boardname)
-		qib_dev_err(dd, "Failed allocation for board name: %s\n", n);
-	else
+	if (dd->boardname)
 		snprintf(dd->boardname, namelen, "%s", n);
 
 	if (dd->majrev != 4 || !dd->minrev || dd->minrev > 2)
@@ -2533,8 +2531,6 @@
 		dd->cspec->cntrnamelen = 1 + s - cntr6120names;
 	dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs
 		* sizeof(u64), GFP_KERNEL);
-	if (!dd->cspec->cntrs)
-		qib_dev_err(dd, "Failed allocation for counters\n");
 
 	for (i = 0, s = (char *)portcntr6120names; s; i++)
 		s = strchr(s + 1, '\n');
@@ -2542,8 +2538,6 @@
 	dd->cspec->portcntrnamelen = sizeof(portcntr6120names) - 1;
 	dd->cspec->portcntrs = kmalloc(dd->cspec->nportcntrs
 		* sizeof(u64), GFP_KERNEL);
-	if (!dd->cspec->portcntrs)
-		qib_dev_err(dd, "Failed allocation for portcounters\n");
 }
 
 static u32 qib_read_6120cntrs(struct qib_devdata *dd, loff_t pos, char **namep,
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 00b2af2..e55e31a 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -2070,9 +2070,7 @@
 
 	namelen = strlen(n) + 1;
 	dd->boardname = kmalloc(namelen, GFP_KERNEL);
-	if (!dd->boardname)
-		qib_dev_err(dd, "Failed allocation for board name: %s\n", n);
-	else
+	if (dd->boardname)
 		snprintf(dd->boardname, namelen, "%s", n);
 
 	if (dd->majrev != 5 || !dd->minrev || dd->minrev > 2)
@@ -3179,8 +3177,6 @@
 		dd->cspec->cntrnamelen = 1 + s - cntr7220names;
 	dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs
 		* sizeof(u64), GFP_KERNEL);
-	if (!dd->cspec->cntrs)
-		qib_dev_err(dd, "Failed allocation for counters\n");
 
 	for (i = 0, s = (char *)portcntr7220names; s; i++)
 		s = strchr(s + 1, '\n');
@@ -3188,8 +3184,6 @@
 	dd->cspec->portcntrnamelen = sizeof(portcntr7220names) - 1;
 	dd->cspec->portcntrs = kmalloc(dd->cspec->nportcntrs
 		* sizeof(u64), GFP_KERNEL);
-	if (!dd->cspec->portcntrs)
-		qib_dev_err(dd, "Failed allocation for portcounters\n");
 }
 
 static u32 qib_read_7220cntrs(struct qib_devdata *dd, loff_t pos, char **namep,
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index ded2717..c4a36160 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -3627,9 +3627,7 @@
 
 	namelen = strlen(n) + 1;
 	dd->boardname = kmalloc(namelen, GFP_KERNEL);
-	if (!dd->boardname)
-		qib_dev_err(dd, "Failed allocation for board name: %s\n", n);
-	else
+	if (dd->boardname)
 		snprintf(dd->boardname, namelen, "%s", n);
 
 	snprintf(dd->boardversion, sizeof(dd->boardversion),
@@ -3656,7 +3654,7 @@
 static int qib_do_7322_reset(struct qib_devdata *dd)
 {
 	u64 val;
-	u64 *msix_vecsave;
+	u64 *msix_vecsave = NULL;
 	int i, msix_entries, ret = 1;
 	u16 cmdval;
 	u8 int_line, clinesz;
@@ -3677,10 +3675,7 @@
 		/* can be up to 512 bytes, too big for stack */
 		msix_vecsave = kmalloc(2 * dd->cspec->num_msix_entries *
 			sizeof(u64), GFP_KERNEL);
-		if (!msix_vecsave)
-			qib_dev_err(dd, "No mem to save MSIx data\n");
-	} else
-		msix_vecsave = NULL;
+	}
 
 	/*
 	 * Core PCI (as of 2.6.18) doesn't save or rewrite the full vector
@@ -5043,8 +5038,6 @@
 		dd->cspec->cntrnamelen = 1 + s - cntr7322names;
 	dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs
 		* sizeof(u64), GFP_KERNEL);
-	if (!dd->cspec->cntrs)
-		qib_dev_err(dd, "Failed allocation for counters\n");
 
 	for (i = 0, s = (char *)portcntr7322names; s; i++)
 		s = strchr(s + 1, '\n');
@@ -5053,9 +5046,6 @@
 	for (i = 0; i < dd->num_pports; ++i) {
 		dd->pport[i].cpspec->portcntrs = kmalloc(dd->cspec->nportcntrs
 			* sizeof(u64), GFP_KERNEL);
-		if (!dd->pport[i].cpspec->portcntrs)
-			qib_dev_err(dd,
-				"Failed allocation for portcounters\n");
 	}
 }
 
@@ -6461,7 +6451,6 @@
 		sizeof(*dd->cspec->sendibchk), GFP_KERNEL);
 	if (!dd->cspec->sendchkenable || !dd->cspec->sendgrhchk ||
 		!dd->cspec->sendibchk) {
-		qib_dev_err(dd, "Failed allocation for hdrchk bitmaps\n");
 		ret = -ENOMEM;
 		goto bail;
 	}
@@ -7338,10 +7327,9 @@
 	tabsize = actual_cnt;
 	dd->cspec->msix_entries = kzalloc(tabsize *
 			sizeof(struct qib_msix_entry), GFP_KERNEL);
-	if (!dd->cspec->msix_entries) {
-		qib_dev_err(dd, "No memory for MSIx table\n");
+	if (!dd->cspec->msix_entries)
 		tabsize = 0;
-	}
+
 	for (i = 0; i < tabsize; i++)
 		dd->cspec->msix_entries[i].msix.entry = i;
 
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 1730aa8..b50240b 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -133,11 +133,8 @@
 	 * cleanup iterates across all possible ctxts.
 	 */
 	dd->rcd = kcalloc(dd->ctxtcnt, sizeof(*dd->rcd), GFP_KERNEL);
-	if (!dd->rcd) {
-		qib_dev_err(dd,
-			"Unable to allocate ctxtdata array, failing\n");
+	if (!dd->rcd)
 		return -ENOMEM;
-	}
 
 	/* create (one or more) kctxt */
 	for (i = 0; i < dd->first_user_ctxt; ++i) {
@@ -265,39 +262,23 @@
 	size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry)
 		* IB_CCT_ENTRIES;
 	ppd->ccti_entries = kzalloc(size, GFP_KERNEL);
-	if (!ppd->ccti_entries) {
-		qib_dev_err(dd,
-		  "failed to allocate congestion control table for port %d!\n",
-		  port);
+	if (!ppd->ccti_entries)
 		goto bail;
-	}
 
 	size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry);
 	ppd->congestion_entries = kzalloc(size, GFP_KERNEL);
-	if (!ppd->congestion_entries) {
-		qib_dev_err(dd,
-		 "failed to allocate congestion setting list for port %d!\n",
-		 port);
+	if (!ppd->congestion_entries)
 		goto bail_1;
-	}
 
 	size = sizeof(struct cc_table_shadow);
 	ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL);
-	if (!ppd->ccti_entries_shadow) {
-		qib_dev_err(dd,
-		 "failed to allocate shadow ccti list for port %d!\n",
-		 port);
+	if (!ppd->ccti_entries_shadow)
 		goto bail_2;
-	}
 
 	size = sizeof(struct ib_cc_congestion_setting_attr);
 	ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL);
-	if (!ppd->congestion_entries_shadow) {
-		qib_dev_err(dd,
-		 "failed to allocate shadow congestion setting list for port %d!\n",
-		 port);
+	if (!ppd->congestion_entries_shadow)
 		goto bail_3;
-	}
 
 	return 0;
 
@@ -391,18 +372,12 @@
 	dma_addr_t *addrs;
 
 	pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *));
-	if (!pages) {
-		qib_dev_err(dd,
-			"failed to allocate shadow page * array, no expected sends!\n");
+	if (!pages)
 		goto bail;
-	}
 
 	addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t));
-	if (!addrs) {
-		qib_dev_err(dd,
-			"failed to allocate shadow dma handle array, no expected sends!\n");
+	if (!addrs)
 		goto bail_free;
-	}
 
 	dd->pageshadow = pages;
 	dd->physshadow = addrs;
@@ -1026,11 +1001,8 @@
 	cnt = 1024;
 
 	addr = vmalloc(cnt);
-	if (!addr) {
-		qib_devinfo(dd->pcidev,
-			 "Couldn't get memory for checking PIO perf, skipping\n");
+	if (!addr)
 		goto done;
-	}
 
 	preempt_disable();  /* we want reasonably accurate elapsed time */
 	msecs = 1 + jiffies_to_msecs(jiffies);
@@ -1172,9 +1144,6 @@
 				      sizeof(long), GFP_KERNEL);
 		if (qib_cpulist)
 			qib_cpulist_count = count;
-		else
-			qib_early_err(&pdev->dev,
-				"Could not alloc cpulist info, cpu affinity might be wrong\n");
 	}
 #ifdef CONFIG_DEBUG_FS
 	qib_dbg_ibdev_init(&dd->verbs_dev);
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 2097512..031433c 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -941,8 +941,6 @@
 {
 	struct ib_other_headers *ohdr;
 	struct rvt_swqe *wqe;
-	struct ib_wc wc;
-	unsigned i;
 	u32 opcode;
 	u32 psn;
 
@@ -988,22 +986,8 @@
 		qp->s_last = s_last;
 		/* see post_send() */
 		barrier();
-		for (i = 0; i < wqe->wr.num_sge; i++) {
-			struct rvt_sge *sge = &wqe->sg_list[i];
-
-			rvt_put_mr(sge->mr);
-		}
-		/* Post a send completion queue entry if requested. */
-		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
-		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
-			memset(&wc, 0, sizeof(wc));
-			wc.wr_id = wqe->wr.wr_id;
-			wc.status = IB_WC_SUCCESS;
-			wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
-			wc.byte_len = wqe->length;
-			wc.qp = &qp->ibqp;
-			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
-		}
+		rvt_put_swqe(wqe);
+		rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
 	}
 	/*
 	 * If we were waiting for sends to complete before resending,
@@ -1032,9 +1016,6 @@
 					 struct rvt_swqe *wqe,
 					 struct qib_ibport *ibp)
 {
-	struct ib_wc wc;
-	unsigned i;
-
 	/*
 	 * Don't decrement refcount and don't generate a
 	 * completion if the SWQE is being resent until the send
@@ -1044,28 +1025,14 @@
 	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
 		u32 s_last;
 
-		for (i = 0; i < wqe->wr.num_sge; i++) {
-			struct rvt_sge *sge = &wqe->sg_list[i];
-
-			rvt_put_mr(sge->mr);
-		}
+		rvt_put_swqe(wqe);
 		s_last = qp->s_last;
 		if (++s_last >= qp->s_size)
 			s_last = 0;
 		qp->s_last = s_last;
 		/* see post_send() */
 		barrier();
-		/* Post a send completion queue entry if requested. */
-		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
-		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
-			memset(&wc, 0, sizeof(wc));
-			wc.wr_id = wqe->wr.wr_id;
-			wc.status = IB_WC_SUCCESS;
-			wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
-			wc.byte_len = wqe->length;
-			wc.qp = &qp->ibqp;
-			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
-		}
+		rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
 	} else
 		this_cpu_inc(*ibp->rvp.rc_delayed_comp);
 
@@ -2112,8 +2079,7 @@
 			 * Update the next expected PSN.  We add 1 later
 			 * below, so only add the remainder here.
 			 */
-			if (len > pmtu)
-				qp->r_psn += (len - 1) / pmtu;
+			qp->r_psn += rvt_div_mtu(qp, len - 1);
 		} else {
 			e->rdma_sge.mr = NULL;
 			e->rdma_sge.vaddr = NULL;
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index de1bde5..e54a2fe 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -793,7 +793,6 @@
 		       enum ib_wc_status status)
 {
 	u32 old_last, last;
-	unsigned i;
 
 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		return;
@@ -805,32 +804,13 @@
 	qp->s_last = last;
 	/* See post_send() */
 	barrier();
-	for (i = 0; i < wqe->wr.num_sge; i++) {
-		struct rvt_sge *sge = &wqe->sg_list[i];
-
-		rvt_put_mr(sge->mr);
-	}
+	rvt_put_swqe(wqe);
 	if (qp->ibqp.qp_type == IB_QPT_UD ||
 	    qp->ibqp.qp_type == IB_QPT_SMI ||
 	    qp->ibqp.qp_type == IB_QPT_GSI)
 		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
 
-	/* See ch. 11.2.4.1 and 10.7.3.1 */
-	if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
-	    (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
-	    status != IB_WC_SUCCESS) {
-		struct ib_wc wc;
-
-		memset(&wc, 0, sizeof(wc));
-		wc.wr_id = wqe->wr.wr_id;
-		wc.status = status;
-		wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
-		wc.qp = &qp->ibqp;
-		if (status == IB_WC_SUCCESS)
-			wc.byte_len = wqe->length;
-		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
-			     status != IB_WC_SUCCESS);
-	}
+	rvt_qp_swqe_complete(qp, wqe, status);
 
 	if (qp->s_acked == old_last)
 		qp->s_acked = last;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 954f150..4b54c0d 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -114,19 +114,6 @@
 MODULE_PARM_DESC(disable_sma, "Disable the SMA");
 
 /*
- * Translate ib_wr_opcode into ib_wc_opcode.
- */
-const enum ib_wc_opcode ib_qib_wc_opcode[] = {
-	[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
-	[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
-	[IB_WR_SEND] = IB_WC_SEND,
-	[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
-	[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
-	[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
-	[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
-};
-
-/*
  * System image GUID.
  */
 __be64 ib_qib_sys_image_guid;
@@ -464,7 +451,7 @@
 		priv = list_entry(list->next, struct qib_qp_priv, iowait);
 		qp = priv->owner;
 		list_del_init(&priv->iowait);
-		atomic_inc(&qp->refcount);
+		rvt_get_qp(qp);
 		if (!list_empty(list))
 			mod_timer(&dev->mem_timer, jiffies + 1);
 	}
@@ -477,8 +464,7 @@
 			qib_schedule_send(qp);
 		}
 		spin_unlock_irqrestore(&qp->s_lock, flags);
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
+		rvt_put_qp(qp);
 	}
 }
 
@@ -762,7 +748,7 @@
 				  iowait);
 		qp = priv->owner;
 		list_del_init(&priv->iowait);
-		atomic_inc(&qp->refcount);
+		rvt_get_qp(qp);
 		spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 
 		spin_lock_irqsave(&qp->s_lock, flags);
@@ -772,8 +758,7 @@
 		}
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
+		rvt_put_qp(qp);
 	} else
 		spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 }
@@ -808,7 +793,7 @@
 			break;
 		avail -= qpp->s_tx->txreq.sg_count;
 		list_del_init(&qpp->iowait);
-		atomic_inc(&qp->refcount);
+		rvt_get_qp(qp);
 		qps[n++] = qp;
 	}
 
@@ -822,8 +807,7 @@
 			qib_schedule_send(qp);
 		}
 		spin_unlock(&qp->s_lock);
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
+		rvt_put_qp(qp);
 	}
 }
 
@@ -1288,7 +1272,7 @@
 		priv = list_entry(list->next, struct qib_qp_priv, iowait);
 		qp = priv->owner;
 		list_del_init(&priv->iowait);
-		atomic_inc(&qp->refcount);
+		rvt_get_qp(qp);
 		qps[n++] = qp;
 	}
 	dd->f_wantpiobuf_intr(dd, 0);
@@ -1306,8 +1290,7 @@
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 
 		/* Notify qib_destroy_qp() if it is waiting. */
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
+		rvt_put_qp(qp);
 	}
 }
 
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
index 2b1a381..092d4e1 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
@@ -228,8 +228,6 @@
 
 	flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction);
 	if (IS_ERR_OR_NULL(flow)) {
-		usnic_err("Unable to alloc flow failed with err %ld\n",
-				PTR_ERR(flow));
 		err = flow ? PTR_ERR(flow) : -EFAULT;
 		goto out_unreserve_port;
 	}
@@ -303,8 +301,6 @@
 
 	flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction);
 	if (IS_ERR_OR_NULL(flow)) {
-		usnic_err("Unable to alloc flow failed with err %ld\n",
-				PTR_ERR(flow));
 		err = flow ? PTR_ERR(flow) : -EFAULT;
 		goto out_put_sock;
 	}
@@ -694,18 +690,14 @@
 	}
 
 	qp_grp = kzalloc(sizeof(*qp_grp), GFP_ATOMIC);
-	if (!qp_grp) {
-		usnic_err("Unable to alloc qp_grp - Out of memory\n");
+	if (!qp_grp)
 		return NULL;
-	}
 
 	qp_grp->res_chunk_list = alloc_res_chunk_list(vf->vnic, res_spec,
 							qp_grp);
 	if (IS_ERR_OR_NULL(qp_grp->res_chunk_list)) {
 		err = qp_grp->res_chunk_list ?
 				PTR_ERR(qp_grp->res_chunk_list) : -ENOMEM;
-		usnic_err("Unable to alloc res for %d with err %d\n",
-				qp_grp->grp_id, err);
 		goto out_free_qp_grp;
 	}
 
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 79766db..74819a7 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -738,7 +738,9 @@
 
 /* In ib callbacks section -  Start of stub funcs */
 struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
-					struct ib_ah_attr *ah_attr)
+				 struct ib_ah_attr *ah_attr,
+				 struct ib_udata *udata)
+
 {
 	usnic_dbg("\n");
 	return ERR_PTR(-EPERM);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 0d9d2e6a..0ed8e07 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -75,7 +75,9 @@
 int usnic_ib_mmap(struct ib_ucontext *context,
 			struct vm_area_struct *vma);
 struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
-					struct ib_ah_attr *ah_attr);
+				 struct ib_ah_attr *ah_attr,
+				 struct ib_udata *udata);
+
 int usnic_ib_destroy_ah(struct ib_ah *ah);
 int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			struct ib_send_wr **bad_wr);
diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.c b/drivers/infiniband/hw/usnic/usnic_vnic.c
index 8875107..e7b0030 100644
--- a/drivers/infiniband/hw/usnic/usnic_vnic.c
+++ b/drivers/infiniband/hw/usnic/usnic_vnic.c
@@ -241,17 +241,12 @@
 		return ERR_PTR(-EINVAL);
 
 	ret = kzalloc(sizeof(*ret), GFP_ATOMIC);
-	if (!ret) {
-		usnic_err("Failed to allocate chunk for %s - Out of memory\n",
-				usnic_vnic_pci_name(vnic));
+	if (!ret)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	if (cnt > 0) {
 		ret->res = kcalloc(cnt, sizeof(*(ret->res)), GFP_ATOMIC);
 		if (!ret->res) {
-			usnic_err("Failed to allocate resources for %s. Out of memory\n",
-					usnic_vnic_pci_name(vnic));
 			kfree(ret);
 			return ERR_PTR(-ENOMEM);
 		}
@@ -311,8 +306,10 @@
 	struct usnic_vnic_res *res;
 
 	cnt = vnic_dev_get_res_count(vnic->vdev, _to_vnic_res_type(type));
-	if (cnt < 1)
+	if (cnt < 1) {
+		usnic_err("Wrong res count with cnt %d\n", cnt);
 		return -EINVAL;
+	}
 
 	chunk->cnt = chunk->free_cnt = cnt;
 	chunk->res = kzalloc(sizeof(*(chunk->res))*cnt, GFP_KERNEL);
@@ -384,12 +381,8 @@
 			res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) {
 		err = usnic_vnic_alloc_res_chunk(vnic, res_type,
 						&vnic->chunks[res_type]);
-		if (err) {
-			usnic_err("Failed to alloc res %s with err %d\n",
-					usnic_vnic_res_type_to_str(res_type),
-					err);
+		if (err)
 			goto out_clean_chunks;
-		}
 	}
 
 	return 0;
@@ -454,11 +447,8 @@
 	}
 
 	vnic = kzalloc(sizeof(*vnic), GFP_KERNEL);
-	if (!vnic) {
-		usnic_err("Failed to alloc vnic for %s - out of memory\n",
-				pci_name(pdev));
+	if (!vnic)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	spin_lock_init(&vnic->res_lock);
 
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 6d9904a..4d0b699 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -119,18 +119,17 @@
 	if (cq->notify == IB_CQ_NEXT_COMP ||
 	    (cq->notify == IB_CQ_SOLICITED &&
 	     (solicited || entry->status != IB_WC_SUCCESS))) {
-		struct kthread_worker *worker;
 		/*
 		 * This will cause send_complete() to be called in
 		 * another thread.
 		 */
-		smp_read_barrier_depends(); /* see rvt_cq_exit */
-		worker = cq->rdi->worker;
-		if (likely(worker)) {
+		spin_lock(&cq->rdi->n_cqs_lock);
+		if (likely(cq->rdi->worker)) {
 			cq->notify = RVT_CQ_NONE;
 			cq->triggered++;
-			kthread_queue_work(worker, &cq->comptask);
+			kthread_queue_work(cq->rdi->worker, &cq->comptask);
 		}
+		spin_unlock(&cq->rdi->n_cqs_lock);
 	}
 
 	spin_unlock_irqrestore(&cq->lock, flags);
@@ -240,15 +239,15 @@
 		}
 	}
 
-	spin_lock(&rdi->n_cqs_lock);
+	spin_lock_irq(&rdi->n_cqs_lock);
 	if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
-		spin_unlock(&rdi->n_cqs_lock);
+		spin_unlock_irq(&rdi->n_cqs_lock);
 		ret = ERR_PTR(-ENOMEM);
 		goto bail_ip;
 	}
 
 	rdi->n_cqs_allocated++;
-	spin_unlock(&rdi->n_cqs_lock);
+	spin_unlock_irq(&rdi->n_cqs_lock);
 
 	if (cq->ip) {
 		spin_lock_irq(&rdi->pending_lock);
@@ -296,9 +295,9 @@
 	struct rvt_dev_info *rdi = cq->rdi;
 
 	kthread_flush_work(&cq->comptask);
-	spin_lock(&rdi->n_cqs_lock);
+	spin_lock_irq(&rdi->n_cqs_lock);
 	rdi->n_cqs_allocated--;
-	spin_unlock(&rdi->n_cqs_lock);
+	spin_unlock_irq(&rdi->n_cqs_lock);
 	if (cq->ip)
 		kref_put(&cq->ip->ref, rvt_release_mmap_info);
 	else
@@ -504,33 +503,23 @@
  */
 int rvt_driver_cq_init(struct rvt_dev_info *rdi)
 {
-	int ret = 0;
 	int cpu;
-	struct task_struct *task;
+	struct kthread_worker *worker;
 
 	if (rdi->worker)
 		return 0;
-	spin_lock_init(&rdi->n_cqs_lock);
-	rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL);
-	if (!rdi->worker)
-		return -ENOMEM;
-	kthread_init_worker(rdi->worker);
-	task = kthread_create_on_node(
-		kthread_worker_fn,
-		rdi->worker,
-		rdi->dparms.node,
-		"%s", rdi->dparms.cq_name);
-	if (IS_ERR(task)) {
-		kfree(rdi->worker);
-		rdi->worker = NULL;
-		return PTR_ERR(task);
-	}
 
-	set_user_nice(task, MIN_NICE);
+	spin_lock_init(&rdi->n_cqs_lock);
+
 	cpu = cpumask_first(cpumask_of_node(rdi->dparms.node));
-	kthread_bind(task, cpu);
-	wake_up_process(task);
-	return ret;
+	worker = kthread_create_worker_on_cpu(cpu, 0,
+					      "%s", rdi->dparms.cq_name);
+	if (IS_ERR(worker))
+		return PTR_ERR(worker);
+
+	set_user_nice(worker->task, MIN_NICE);
+	rdi->worker = worker;
+	return 0;
 }
 
 /**
@@ -541,13 +530,14 @@
 {
 	struct kthread_worker *worker;
 
-	worker = rdi->worker;
-	if (!worker)
+	/* block future queuing from send_complete() */
+	spin_lock_irq(&rdi->n_cqs_lock);
+	if (!rdi->worker) {
+		spin_unlock_irq(&rdi->n_cqs_lock);
 		return;
-	/* blocks future queuing from send_complete() */
+	}
 	rdi->worker = NULL;
-	smp_wmb(); /* See rdi_cq_enter */
-	kthread_flush_worker(worker);
-	kthread_stop(worker->task);
-	kfree(worker);
+	spin_unlock_irq(&rdi->n_cqs_lock);
+
+	kthread_destroy_worker(worker);
 }
diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c
index 983d319..05c8c2a 100644
--- a/drivers/infiniband/sw/rdmavt/mcast.c
+++ b/drivers/infiniband/sw/rdmavt/mcast.c
@@ -81,7 +81,7 @@
 		goto bail;
 
 	mqp->qp = qp;
-	atomic_inc(&qp->refcount);
+	rvt_get_qp(qp);
 
 bail:
 	return mqp;
@@ -92,8 +92,7 @@
 	struct rvt_qp *qp = mqp->qp;
 
 	/* Notify hfi1_destroy_qp() if it is waiting. */
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
+	rvt_put_qp(qp);
 
 	kfree(mqp);
 }
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 46b6497..52fd152 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -51,6 +51,7 @@
 #include <rdma/rdma_vt.h>
 #include "vt.h"
 #include "mr.h"
+#include "trace.h"
 
 /**
  * rvt_driver_mr_init - Init MR resources per driver
@@ -84,6 +85,7 @@
 		lkey_table_size = rdi->dparms.lkey_table_size;
 	}
 	rdi->lkey_table.max = 1 << lkey_table_size;
+	rdi->lkey_table.shift = 32 - lkey_table_size;
 	lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table);
 	rdi->lkey_table.table = (struct rvt_mregion __rcu **)
 			       vmalloc_node(lk_tab_size, rdi->dparms.node);
@@ -402,6 +404,7 @@
 		}
 		mr->mr.map[m]->segs[n].vaddr = vaddr;
 		mr->mr.map[m]->segs[n].length = umem->page_size;
+		trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, umem->page_size);
 		n++;
 		if (n == RVT_SEGSZ) {
 			m++;
@@ -506,6 +509,7 @@
 	n = mapped_segs % RVT_SEGSZ;
 	mr->mr.map[m]->segs[n].vaddr = (void *)addr;
 	mr->mr.map[m]->segs[n].length = ps;
+	trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps);
 	mr->mr.length += ps;
 
 	return 0;
@@ -692,6 +696,7 @@
 	for (i = 0; i < list_len; i++) {
 		fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i];
 		fmr->mr.map[m]->segs[n].length = ps;
+		trace_rvt_mr_fmr_seg(&fmr->mr, m, n, (void *)page_list[i], ps);
 		if (++n == RVT_SEGSZ) {
 			m++;
 			n = 0;
@@ -774,7 +779,6 @@
 	struct rvt_mregion *mr;
 	unsigned n, m;
 	size_t off;
-	struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
 
 	/*
 	 * We use LKEY == zero for kernel virtual addresses
@@ -782,12 +786,14 @@
 	 */
 	rcu_read_lock();
 	if (sge->lkey == 0) {
+		struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
+
 		if (pd->user)
 			goto bail;
 		mr = rcu_dereference(dev->dma_mr);
 		if (!mr)
 			goto bail;
-		atomic_inc(&mr->refcount);
+		rvt_get_mr(mr);
 		rcu_read_unlock();
 
 		isge->mr = mr;
@@ -798,8 +804,7 @@
 		isge->n = 0;
 		goto ok;
 	}
-	mr = rcu_dereference(
-		rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]);
+	mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
 	if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
 		     mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
 		goto bail;
@@ -809,7 +814,7 @@
 		     off + sge->length > mr->length ||
 		     (mr->access_flags & acc) != acc))
 		goto bail;
-	atomic_inc(&mr->refcount);
+	rvt_get_mr(mr);
 	rcu_read_unlock();
 
 	off += mr->offset;
@@ -887,7 +892,7 @@
 		mr = rcu_dereference(rdi->dma_mr);
 		if (!mr)
 			goto bail;
-		atomic_inc(&mr->refcount);
+		rvt_get_mr(mr);
 		rcu_read_unlock();
 
 		sge->mr = mr;
@@ -899,8 +904,7 @@
 		goto ok;
 	}
 
-	mr = rcu_dereference(
-		rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]);
+	mr = rcu_dereference(rkt->table[rkey >> rkt->shift]);
 	if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
 		     mr->lkey != rkey || qp->ibqp.pd != mr->pd))
 		goto bail;
@@ -909,7 +913,7 @@
 	if (unlikely(vaddr < mr->iova || off + len > mr->length ||
 		     (mr->access_flags & acc) == 0))
 		goto bail;
-	atomic_inc(&mr->refcount);
+	rvt_get_mr(mr);
 	rcu_read_unlock();
 
 	off += mr->offset;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 6500c3b..2a13ac6 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -76,6 +76,23 @@
 };
 EXPORT_SYMBOL(ib_rvt_state_ops);
 
+/*
+ * Translate ib_wr_opcode into ib_wc_opcode.
+ */
+const enum ib_wc_opcode ib_rvt_wc_opcode[] = {
+	[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+	[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+	[IB_WR_SEND] = IB_WC_SEND,
+	[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
+	[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
+	[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
+	[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
+	[IB_WR_SEND_WITH_INV] = IB_WC_SEND,
+	[IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
+	[IB_WR_REG_MR] = IB_WC_REG_MR
+};
+EXPORT_SYMBOL(ib_rvt_wc_opcode);
+
 static void get_map_page(struct rvt_qpn_table *qpt,
 			 struct rvt_qpn_map *map,
 			 gfp_t gfp)
@@ -884,7 +901,8 @@
 	return ret;
 
 bail_ip:
-	kref_put(&qp->ip->ref, rvt_release_mmap_info);
+	if (qp->ip)
+		kref_put(&qp->ip->ref, rvt_release_mmap_info);
 
 bail_qpn:
 	free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h
index 6c0457d..e2d23ac 100644
--- a/drivers/infiniband/sw/rdmavt/trace.h
+++ b/drivers/infiniband/sw/rdmavt/trace.h
@@ -45,143 +45,10 @@
  *
  */
 
-#undef TRACE_SYSTEM_VAR
-#define TRACE_SYSTEM_VAR rdmavt
-
-#if !defined(__RDMAVT_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
-#define __RDMAVT_TRACE_H
-
-#include <linux/tracepoint.h>
-#include <linux/trace_seq.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/rdma_vt.h>
-
 #define RDI_DEV_ENTRY(rdi)   __string(dev, rdi->driver_f.get_card_name(rdi))
 #define RDI_DEV_ASSIGN(rdi)  __assign_str(dev, rdi->driver_f.get_card_name(rdi))
 
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM rdmavt
-
-TRACE_EVENT(rvt_dbg,
-	TP_PROTO(struct rvt_dev_info *rdi,
-		 const char *msg),
-	TP_ARGS(rdi, msg),
-	TP_STRUCT__entry(
-		RDI_DEV_ENTRY(rdi)
-		__string(msg, msg)
-	),
-	TP_fast_assign(
-		RDI_DEV_ASSIGN(rdi);
-		__assign_str(msg, msg);
-	),
-	TP_printk("[%s]: %s", __get_str(dev), __get_str(msg))
-);
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM rvt_qphash
-DECLARE_EVENT_CLASS(rvt_qphash_template,
-	TP_PROTO(struct rvt_qp *qp, u32 bucket),
-	TP_ARGS(qp, bucket),
-	TP_STRUCT__entry(
-		RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
-		__field(u32, qpn)
-		__field(u32, bucket)
-	),
-	TP_fast_assign(
-		RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
-		__entry->qpn = qp->ibqp.qp_num;
-		__entry->bucket = bucket;
-	),
-	TP_printk(
-		"[%s] qpn 0x%x bucket %u",
-		__get_str(dev),
-		__entry->qpn,
-		__entry->bucket
-	)
-);
-
-DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert,
-	TP_PROTO(struct rvt_qp *qp, u32 bucket),
-	TP_ARGS(qp, bucket));
-
-DEFINE_EVENT(rvt_qphash_template, rvt_qpremove,
-	TP_PROTO(struct rvt_qp *qp, u32 bucket),
-	TP_ARGS(qp, bucket));
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM rvt_tx
-
-#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode  }
-#define show_wr_opcode(opcode)                             \
-__print_symbolic(opcode,                                   \
-	wr_opcode_name(RDMA_WRITE),                        \
-	wr_opcode_name(RDMA_WRITE_WITH_IMM),               \
-	wr_opcode_name(SEND),                              \
-	wr_opcode_name(SEND_WITH_IMM),                     \
-	wr_opcode_name(RDMA_READ),                         \
-	wr_opcode_name(ATOMIC_CMP_AND_SWP),                \
-	wr_opcode_name(ATOMIC_FETCH_AND_ADD),              \
-	wr_opcode_name(LSO),                               \
-	wr_opcode_name(SEND_WITH_INV),                     \
-	wr_opcode_name(RDMA_READ_WITH_INV),                \
-	wr_opcode_name(LOCAL_INV),                         \
-	wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP),         \
-	wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD))
-
-#define POS_PRN \
-"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u"
-
-TRACE_EVENT(
-	rvt_post_one_wr,
-	TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe),
-	TP_ARGS(qp, wqe),
-	TP_STRUCT__entry(
-		RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
-		__field(u64, wr_id)
-		__field(u32, qpn)
-		__field(u32, psn)
-		__field(u32, lpsn)
-		__field(u32, length)
-		__field(u32, opcode)
-		__field(u32, size)
-		__field(u32, avail)
-		__field(u32, head)
-		__field(u32, last)
-	),
-	TP_fast_assign(
-		RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
-		__entry->wr_id = wqe->wr.wr_id;
-		__entry->qpn = qp->ibqp.qp_num;
-		__entry->psn = wqe->psn;
-		__entry->lpsn = wqe->lpsn;
-		__entry->length = wqe->length;
-		__entry->opcode = wqe->wr.opcode;
-		__entry->size = qp->s_size;
-		__entry->avail = qp->s_avail;
-		__entry->head = qp->s_head;
-		__entry->last = qp->s_last;
-	),
-	TP_printk(
-		POS_PRN,
-		__get_str(dev),
-		__entry->wr_id,
-		__entry->qpn,
-		__entry->psn,
-		__entry->lpsn,
-		__entry->length,
-		__entry->opcode, show_wr_opcode(__entry->opcode),
-		__entry->size,
-		__entry->avail,
-		__entry->head,
-		__entry->last
-	)
-);
-
-#endif /* __RDMAVT_TRACE_H */
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE trace
-#include <trace/define_trace.h>
+#include "trace_rvt.h"
+#include "trace_qp.h"
+#include "trace_tx.h"
+#include "trace_mr.h"
diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h
new file mode 100644
index 0000000..3318a6c
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_mr.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_MR_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_MR_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_mr.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_mr
+DECLARE_EVENT_CLASS(
+	rvt_mr_template,
+	TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
+	TP_ARGS(mr, m, n, v, len),
+	TP_STRUCT__entry(
+		RDI_DEV_ENTRY(ib_to_rvt(mr->pd->device))
+		__field(void *, vaddr)
+		__field(struct page *, page)
+		__field(size_t, len)
+		__field(u32, lkey)
+		__field(u16, m)
+		__field(u16, n)
+	),
+	TP_fast_assign(
+		RDI_DEV_ASSIGN(ib_to_rvt(mr->pd->device));
+		__entry->vaddr = v;
+		__entry->page = virt_to_page(v);
+		__entry->m = m;
+		__entry->n = n;
+		__entry->len = len;
+	),
+	TP_printk(
+		"[%s] vaddr %p page %p m %u n %u len %ld",
+		__get_str(dev),
+		__entry->vaddr,
+		__entry->page,
+		__entry->m,
+		__entry->n,
+		__entry->len
+	)
+);
+
+DEFINE_EVENT(
+	rvt_mr_template, rvt_mr_page_seg,
+	TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
+	TP_ARGS(mr, m, n, v, len));
+
+DEFINE_EVENT(
+	rvt_mr_template, rvt_mr_fmr_seg,
+	TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
+	TP_ARGS(mr, m, n, v, len));
+
+DEFINE_EVENT(
+	rvt_mr_template, rvt_mr_user_seg,
+	TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
+	TP_ARGS(mr, m, n, v, len));
+
+#endif /* __RVT_TRACE_MR_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_mr
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/sw/rdmavt/trace_qp.h b/drivers/infiniband/sw/rdmavt/trace_qp.h
new file mode 100644
index 0000000..4c77a31
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_qp.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_QP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_QP_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_qp
+
+DECLARE_EVENT_CLASS(rvt_qphash_template,
+	TP_PROTO(struct rvt_qp *qp, u32 bucket),
+	TP_ARGS(qp, bucket),
+	TP_STRUCT__entry(
+		RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+		__field(u32, qpn)
+		__field(u32, bucket)
+	),
+	TP_fast_assign(
+		RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+		__entry->qpn = qp->ibqp.qp_num;
+		__entry->bucket = bucket;
+	),
+	TP_printk(
+		"[%s] qpn 0x%x bucket %u",
+		__get_str(dev),
+		__entry->qpn,
+		__entry->bucket
+	)
+);
+
+DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert,
+	TP_PROTO(struct rvt_qp *qp, u32 bucket),
+	TP_ARGS(qp, bucket));
+
+DEFINE_EVENT(rvt_qphash_template, rvt_qpremove,
+	TP_PROTO(struct rvt_qp *qp, u32 bucket),
+	TP_ARGS(qp, bucket));
+
+
+#endif /* __RVT_TRACE_QP_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_qp
+#include <trace/define_trace.h>
+
diff --git a/drivers/infiniband/sw/rdmavt/trace_rvt.h b/drivers/infiniband/sw/rdmavt/trace_rvt.h
new file mode 100644
index 0000000..746f334
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_rvt.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_RVT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_RVT_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt
+
+TRACE_EVENT(rvt_dbg,
+	TP_PROTO(struct rvt_dev_info *rdi,
+		 const char *msg),
+	TP_ARGS(rdi, msg),
+	TP_STRUCT__entry(
+		RDI_DEV_ENTRY(rdi)
+		__string(msg, msg)
+	),
+	TP_fast_assign(
+		RDI_DEV_ASSIGN(rdi);
+		__assign_str(msg, msg);
+	),
+	TP_printk("[%s]: %s", __get_str(dev), __get_str(msg))
+);
+
+#endif /* __RVT_TRACE_MISC_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_rvt
+#include <trace/define_trace.h>
+
diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h
new file mode 100644
index 0000000..0e03173
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_tx.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_TX_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_tx
+
+#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode  }
+#define show_wr_opcode(opcode)                             \
+__print_symbolic(opcode,                                   \
+	wr_opcode_name(RDMA_WRITE),                        \
+	wr_opcode_name(RDMA_WRITE_WITH_IMM),               \
+	wr_opcode_name(SEND),                              \
+	wr_opcode_name(SEND_WITH_IMM),                     \
+	wr_opcode_name(RDMA_READ),                         \
+	wr_opcode_name(ATOMIC_CMP_AND_SWP),                \
+	wr_opcode_name(ATOMIC_FETCH_AND_ADD),              \
+	wr_opcode_name(LSO),                               \
+	wr_opcode_name(SEND_WITH_INV),                     \
+	wr_opcode_name(RDMA_READ_WITH_INV),                \
+	wr_opcode_name(LOCAL_INV),                         \
+	wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP),         \
+	wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD))
+
+#define POS_PRN \
+"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u"
+
+TRACE_EVENT(
+	rvt_post_one_wr,
+	TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe),
+	TP_ARGS(qp, wqe),
+	TP_STRUCT__entry(
+		RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+		__field(u64, wr_id)
+		__field(u32, qpn)
+		__field(u32, psn)
+		__field(u32, lpsn)
+		__field(u32, length)
+		__field(u32, opcode)
+		__field(u32, size)
+		__field(u32, avail)
+		__field(u32, head)
+		__field(u32, last)
+	),
+	TP_fast_assign(
+		RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+		__entry->wr_id = wqe->wr.wr_id;
+		__entry->qpn = qp->ibqp.qp_num;
+		__entry->psn = wqe->psn;
+		__entry->lpsn = wqe->lpsn;
+		__entry->length = wqe->length;
+		__entry->opcode = wqe->wr.opcode;
+		__entry->size = qp->s_size;
+		__entry->avail = qp->s_avail;
+		__entry->head = qp->s_head;
+		__entry->last = qp->s_last;
+	),
+	TP_printk(
+		POS_PRN,
+		__get_str(dev),
+		__entry->wr_id,
+		__entry->qpn,
+		__entry->psn,
+		__entry->lpsn,
+		__entry->length,
+		__entry->opcode, show_wr_opcode(__entry->opcode),
+		__entry->size,
+		__entry->avail,
+		__entry->head,
+		__entry->last
+	)
+);
+
+#endif /* __RVT_TRACE_TX_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_tx
+#include <trace/define_trace.h>
+
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 6c5e29d..cd27cbd 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -420,11 +420,12 @@
 	    (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
 	    (qp->req.state == QP_STATE_ERROR)) {
 		make_send_cqe(qp, wqe, &cqe);
+		advance_consumer(qp->sq.queue);
 		rxe_cq_post(qp->scq, &cqe, 0);
+	} else {
+		advance_consumer(qp->sq.queue);
 	}
 
-	advance_consumer(qp->sq.queue);
-
 	/*
 	 * we completed something so let req run again
 	 * if it is trying to fence
@@ -510,6 +511,8 @@
 	struct rxe_pkt_info *pkt = NULL;
 	enum comp_state state;
 
+	rxe_add_ref(qp);
+
 	if (!qp->valid) {
 		while ((skb = skb_dequeue(&qp->resp_pkts))) {
 			rxe_drop_ref(qp);
@@ -739,11 +742,13 @@
 	/* we come here if we are done with processing and want the task to
 	 * exit from the loop calling us
 	 */
+	rxe_drop_ref(qp);
 	return -EAGAIN;
 
 done:
 	/* we come here if we have processed a packet we want the task to call
 	 * us again to see if there is anything else to do
 	 */
+	rxe_drop_ref(qp);
 	return 0;
 }
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 73849a5a..efe4c6a 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -266,8 +266,6 @@
 		return err;
 	}
 
-	atomic_inc(&qp->skb_out);
-
 	if ((qp_type(qp) != IB_QPT_RC) &&
 	    (pkt->mask & RXE_END_MASK)) {
 		pkt->wqe->state = wqe_state_done;
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 1869152..d0faca2 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -355,6 +355,9 @@
 	size_t			offset;
 	u32			crc = crcp ? (*crcp) : 0;
 
+	if (length == 0)
+		return 0;
+
 	if (mem->type == RXE_MEM_TYPE_DMA) {
 		u8 *src, *dest;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index b8258e4..a576603 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -46,7 +46,7 @@
 #include "rxe_loc.h"
 
 static LIST_HEAD(rxe_dev_list);
-static spinlock_t dev_list_lock; /* spinlock for device list */
+static DEFINE_SPINLOCK(dev_list_lock); /* spinlock for device list */
 
 struct rxe_dev *net_to_rxe(struct net_device *ndev)
 {
@@ -459,6 +459,8 @@
 		return -EAGAIN;
 	}
 
+	if (pkt->qp)
+		atomic_inc(&pkt->qp->skb_out);
 	kfree_skb(skb);
 
 	return 0;
@@ -663,8 +665,6 @@
 
 int rxe_net_ipv4_init(void)
 {
-	spin_lock_init(&dev_list_lock);
-
 	recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net,
 				htons(ROCE_V2_UDP_DPORT), false);
 	if (IS_ERR(recv_sockets.sk4)) {
@@ -680,8 +680,6 @@
 {
 #if IS_ENABLED(CONFIG_IPV6)
 
-	spin_lock_init(&dev_list_lock);
-
 	recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
 						htons(ROCE_V2_UDP_DPORT), true);
 	if (IS_ERR(recv_sockets.sk6)) {
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index f459c43..13ed2cc 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -82,7 +82,7 @@
 	RXE_MAX_SGE			= 32,
 	RXE_MAX_SGE_RD			= 32,
 	RXE_MAX_CQ			= 16384,
-	RXE_MAX_LOG_CQE			= 13,
+	RXE_MAX_LOG_CQE			= 15,
 	RXE_MAX_MR			= 2 * 1024,
 	RXE_MAX_PD			= 0x7ffc,
 	RXE_MAX_QP_RD_ATOM		= 128,
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 6bac071..d723947 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -180,7 +180,6 @@
 	size = BITS_TO_LONGS(max - min + 1) * sizeof(long);
 	pool->table = kmalloc(size, GFP_KERNEL);
 	if (!pool->table) {
-		pr_warn("no memory for bit table\n");
 		err = -ENOMEM;
 		goto out;
 	}
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 46f0628..252b4d6 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -391,16 +391,15 @@
 			     payload_size(pkt));
 	calc_icrc = cpu_to_be32(~calc_icrc);
 	if (unlikely(calc_icrc != pack_icrc)) {
-		char saddr[sizeof(struct in6_addr)];
-
 		if (skb->protocol == htons(ETH_P_IPV6))
-			sprintf(saddr, "%pI6", &ipv6_hdr(skb)->saddr);
+			pr_warn_ratelimited("bad ICRC from %pI6c\n",
+					    &ipv6_hdr(skb)->saddr);
 		else if (skb->protocol == htons(ETH_P_IP))
-			sprintf(saddr, "%pI4", &ip_hdr(skb)->saddr);
+			pr_warn_ratelimited("bad ICRC from %pI4\n",
+					    &ip_hdr(skb)->saddr);
 		else
-			sprintf(saddr, "unknown");
+			pr_warn_ratelimited("bad ICRC from unknown\n");
 
-		pr_warn_ratelimited("bad ICRC from %s\n", saddr);
 		goto drop;
 	}
 
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 832846b..b246653 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -548,23 +548,23 @@
 static void save_state(struct rxe_send_wqe *wqe,
 		       struct rxe_qp *qp,
 		       struct rxe_send_wqe *rollback_wqe,
-		       struct rxe_qp *rollback_qp)
+		       u32 *rollback_psn)
 {
 	rollback_wqe->state     = wqe->state;
 	rollback_wqe->first_psn = wqe->first_psn;
 	rollback_wqe->last_psn  = wqe->last_psn;
-	rollback_qp->req.psn    = qp->req.psn;
+	*rollback_psn		= qp->req.psn;
 }
 
 static void rollback_state(struct rxe_send_wqe *wqe,
 			   struct rxe_qp *qp,
 			   struct rxe_send_wqe *rollback_wqe,
-			   struct rxe_qp *rollback_qp)
+			   u32 rollback_psn)
 {
 	wqe->state     = rollback_wqe->state;
 	wqe->first_psn = rollback_wqe->first_psn;
 	wqe->last_psn  = rollback_wqe->last_psn;
-	qp->req.psn    = rollback_qp->req.psn;
+	qp->req.psn    = rollback_psn;
 }
 
 static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
@@ -593,8 +593,10 @@
 	int mtu;
 	int opcode;
 	int ret;
-	struct rxe_qp rollback_qp;
 	struct rxe_send_wqe rollback_wqe;
+	u32 rollback_psn;
+
+	rxe_add_ref(qp);
 
 next_wqe:
 	if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
@@ -718,7 +720,7 @@
 	 * rxe_xmit_packet().
 	 * Otherwise, completer might initiate an unjustified retry flow.
 	 */
-	save_state(wqe, qp, &rollback_wqe, &rollback_qp);
+	save_state(wqe, qp, &rollback_wqe, &rollback_psn);
 	update_wqe_state(qp, wqe, &pkt);
 	update_wqe_psn(qp, wqe, &pkt, payload);
 	ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
@@ -726,7 +728,7 @@
 		qp->need_req_skb = 1;
 		kfree_skb(skb);
 
-		rollback_state(wqe, qp, &rollback_wqe, &rollback_qp);
+		rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
 
 		if (ret == -EAGAIN) {
 			rxe_run_task(&qp->req.task, 1);
@@ -750,9 +752,10 @@
 		while (rxe_completer(qp) == 0)
 			;
 	}
-
+	rxe_drop_ref(qp);
 	return 0;
 
 exit:
+	rxe_drop_ref(qp);
 	return -EAGAIN;
 }
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index dd3d88a..7a36ec9 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -444,6 +444,13 @@
 		return RESPST_EXECUTE;
 	}
 
+	/* A zero-byte op is not required to set an addr or rkey. */
+	if ((pkt->mask & (RXE_READ_MASK | RXE_WRITE_OR_SEND)) &&
+	    (pkt->mask & RXE_RETH_MASK) &&
+	    reth_len(pkt) == 0) {
+		return RESPST_EXECUTE;
+	}
+
 	va	= qp->resp.va;
 	rkey	= qp->resp.rkey;
 	resid	= qp->resp.resid;
@@ -680,9 +687,14 @@
 		res->read.va_org	= qp->resp.va;
 
 		res->first_psn		= req_pkt->psn;
-		res->last_psn		= req_pkt->psn +
-					  (reth_len(req_pkt) + mtu - 1) /
-					  mtu - 1;
+
+		if (reth_len(req_pkt)) {
+			res->last_psn	= (req_pkt->psn +
+					   (reth_len(req_pkt) + mtu - 1) /
+					   mtu - 1) & BTH_PSN_MASK;
+		} else {
+			res->last_psn	= res->first_psn;
+		}
 		res->cur_psn		= req_pkt->psn;
 
 		res->read.resid		= qp->resp.resid;
@@ -742,7 +754,8 @@
 	} else {
 		qp->resp.res = NULL;
 		qp->resp.opcode = -1;
-		qp->resp.psn = res->cur_psn;
+		if (psn_compare(res->cur_psn, qp->resp.psn) >= 0)
+			qp->resp.psn = res->cur_psn;
 		state = RESPST_CLEANUP;
 	}
 
@@ -1132,6 +1145,7 @@
 					     pkt, skb_copy);
 			if (rc) {
 				pr_err("Failed resending result. This flow is not handled - skb ignored\n");
+				rxe_drop_ref(qp);
 				kfree_skb(skb_copy);
 				rc = RESPST_CLEANUP;
 				goto out;
@@ -1198,6 +1212,8 @@
 	struct rxe_pkt_info *pkt = NULL;
 	int ret = 0;
 
+	rxe_add_ref(qp);
+
 	qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
 
 	if (!qp->valid) {
@@ -1386,5 +1402,6 @@
 exit:
 	ret = -EAGAIN;
 done:
+	rxe_drop_ref(qp);
 	return ret;
 }
diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
index 2a6e3cd..efc832a 100644
--- a/drivers/infiniband/sw/rxe/rxe_srq.c
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -169,7 +169,7 @@
 			}
 		}
 
-		err = rxe_queue_resize(q, (unsigned int *)&attr->max_wr,
+		err = rxe_queue_resize(q, &attr->max_wr,
 				       rcv_wqe_size(srq->rq.max_sge),
 				       srq->rq.queue->ip ?
 						srq->rq.queue->ip->context :
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index 1e19bf8..d2a14a1 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -121,6 +121,7 @@
 	task->arg	= arg;
 	task->func	= func;
 	snprintf(task->name, sizeof(task->name), "%s", name);
+	task->destroyed	= false;
 
 	tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task);
 
@@ -132,11 +133,29 @@
 
 void rxe_cleanup_task(struct rxe_task *task)
 {
+	unsigned long flags;
+	bool idle;
+
+	/*
+	 * Mark the task, then wait for it to finish. It might be
+	 * running in a non-tasklet (direct call) context.
+	 */
+	task->destroyed = true;
+
+	do {
+		spin_lock_irqsave(&task->state_lock, flags);
+		idle = (task->state == TASK_STATE_START);
+		spin_unlock_irqrestore(&task->state_lock, flags);
+	} while (!idle);
+
 	tasklet_kill(&task->tasklet);
 }
 
 void rxe_run_task(struct rxe_task *task, int sched)
 {
+	if (task->destroyed)
+		return;
+
 	if (sched)
 		tasklet_schedule(&task->tasklet);
 	else
diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h
index d14aa6d..08ff42d 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.h
+++ b/drivers/infiniband/sw/rxe/rxe_task.h
@@ -54,6 +54,7 @@
 	int			(*func)(void *arg);
 	int			ret;
 	char			name[16];
+	bool			destroyed;
 };
 
 /*
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 19841c8..beb7021 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -316,7 +316,9 @@
 	return err;
 }
 
-static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
+static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+				   struct ib_udata *udata)
+
 {
 	int err;
 	struct rxe_dev *rxe = to_rdev(ibpd->device);
@@ -564,7 +566,7 @@
 	if (udata) {
 		if (udata->inlen) {
 			err = -EINVAL;
-			goto err1;
+			goto err2;
 		}
 		qp->is_user = 1;
 	}
@@ -573,12 +575,13 @@
 
 	err = rxe_qp_from_init(rxe, qp, pd, init, udata, ibpd);
 	if (err)
-		goto err2;
+		goto err3;
 
 	return &qp->ibqp;
 
-err2:
+err3:
 	rxe_drop_index(qp);
+err2:
 	rxe_drop_ref(qp);
 err1:
 	return ERR_PTR(err);
@@ -1007,11 +1010,19 @@
 static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 {
 	struct rxe_cq *cq = to_rcq(ibcq);
+	unsigned long irq_flags;
+	int ret = 0;
 
+	spin_lock_irqsave(&cq->cq_lock, irq_flags);
 	if (cq->notify != IB_CQ_NEXT_COMP)
 		cq->notify = flags & IB_CQ_SOLICITED_MASK;
 
-	return 0;
+	if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !queue_empty(cq->queue))
+		ret = 1;
+
+	spin_unlock_irqrestore(&cq->cq_lock, irq_flags);
+
+	return ret;
 }
 
 static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 4ad297d..46234f5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -355,11 +355,8 @@
 	int i;
 
 	rx->rx_ring = vzalloc(ipoib_recvq_size * sizeof *rx->rx_ring);
-	if (!rx->rx_ring) {
-		printk(KERN_WARNING "%s: failed to allocate CM non-SRQ ring (%d entries)\n",
-		       priv->ca->name, ipoib_recvq_size);
+	if (!rx->rx_ring)
 		return -ENOMEM;
-	}
 
 	t = kmalloc(sizeof *t, GFP_KERNEL);
 	if (!t) {
@@ -1053,8 +1050,6 @@
 
 	tx_qp = ib_create_qp(priv->pd, &attr);
 	if (PTR_ERR(tx_qp) == -EINVAL) {
-		ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n",
-			   priv->ca->name);
 		attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO;
 		tx_qp = ib_create_qp(priv->pd, &attr);
 	}
@@ -1133,7 +1128,6 @@
 	p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring,
 			       GFP_NOIO, PAGE_KERNEL);
 	if (!p->tx_ring) {
-		ipoib_warn(priv, "failed to allocate tx ring\n");
 		ret = -ENOMEM;
 		goto err_tx;
 	}
@@ -1549,8 +1543,6 @@
 
 	priv->cm.srq_ring = vzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring);
 	if (!priv->cm.srq_ring) {
-		printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n",
-		       priv->ca->name, ipoib_recvq_size);
 		ib_destroy_srq(priv->cm.srq);
 		priv->cm.srq = NULL;
 		return;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index be11d5d..43cf8b8a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -418,11 +418,8 @@
 			   "(status=%d, wrid=%d vend_err %x)\n",
 			   wc->status, wr_id, wc->vendor_err);
 		qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC);
-		if (!qp_work) {
-			ipoib_warn(priv, "%s Failed alloc ipoib_qp_state_validate for qp: 0x%x\n",
-				   __func__, priv->qp->qp_num);
+		if (!qp_work)
 			return;
-		}
 
 		INIT_WORK(&qp_work->work, ipoib_qp_state_validate_work);
 		qp_work->priv = priv;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 5636fc3d..423b30d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1594,11 +1594,8 @@
 	/* Allocate RX/TX "rings" to hold queued skbs */
 	priv->rx_ring =	kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
 				GFP_KERNEL);
-	if (!priv->rx_ring) {
-		printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
-		       ca->name, ipoib_recvq_size);
+	if (!priv->rx_ring)
 		goto out;
-	}
 
 	priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
 	if (!priv->tx_ring) {
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index a4b791d..8ae7a3b 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -890,11 +890,14 @@
 	case RDMA_CM_EVENT_ESTABLISHED:
 		iser_connected_handler(cma_id, event->param.conn.private_data);
 		break;
+	case RDMA_CM_EVENT_REJECTED:
+		iser_info("Connection rejected: %s\n",
+			 rdma_reject_msg(cma_id, event->status));
+		/* FALLTHROUGH */
 	case RDMA_CM_EVENT_ADDR_ERROR:
 	case RDMA_CM_EVENT_ROUTE_ERROR:
 	case RDMA_CM_EVENT_CONNECT_ERROR:
 	case RDMA_CM_EVENT_UNREACHABLE:
-	case RDMA_CM_EVENT_REJECTED:
 		iser_connect_error(cma_id);
 		break;
 	case RDMA_CM_EVENT_DISCONNECTED:
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index f0ba5f8..314e955 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -184,7 +184,7 @@
 	isert_conn->rx_descs = kzalloc(ISERT_QP_MAX_RECV_DTOS *
 				sizeof(struct iser_rx_desc), GFP_KERNEL);
 	if (!isert_conn->rx_descs)
-		goto fail;
+		return -ENOMEM;
 
 	rx_desc = isert_conn->rx_descs;
 
@@ -213,9 +213,7 @@
 	}
 	kfree(isert_conn->rx_descs);
 	isert_conn->rx_descs = NULL;
-fail:
 	isert_err("conn %p failed to allocate rx descriptors\n", isert_conn);
-
 	return -ENOMEM;
 }
 
@@ -269,10 +267,8 @@
 
 	device->comps = kcalloc(device->comps_used, sizeof(struct isert_comp),
 				GFP_KERNEL);
-	if (!device->comps) {
-		isert_err("Unable to allocate completion contexts\n");
+	if (!device->comps)
 		return -ENOMEM;
-	}
 
 	max_cqe = min(ISER_MAX_CQ_LEN, device->ib_device->attrs.max_cqe);
 
@@ -432,10 +428,8 @@
 
 	isert_conn->login_req_buf = kzalloc(sizeof(*isert_conn->login_req_buf),
 			GFP_KERNEL);
-	if (!isert_conn->login_req_buf) {
-		isert_err("Unable to allocate isert_conn->login_buf\n");
+	if (!isert_conn->login_req_buf)
 		return -ENOMEM;
-	}
 
 	isert_conn->login_req_dma = ib_dma_map_single(ib_dev,
 				isert_conn->login_req_buf,
@@ -795,6 +789,8 @@
 		 */
 		return 1;
 	case RDMA_CM_EVENT_REJECTED:       /* FALLTHRU */
+		isert_info("Connection rejected: %s\n",
+			   rdma_reject_msg(cma_id, event->status));
 	case RDMA_CM_EVENT_UNREACHABLE:    /* FALLTHRU */
 	case RDMA_CM_EVENT_CONNECT_ERROR:
 		ret = isert_connect_error(cma_id);
@@ -1276,11 +1272,8 @@
 
 	if (payload_length) {
 		text_in = kzalloc(payload_length, GFP_KERNEL);
-		if (!text_in) {
-			isert_err("Unable to allocate text_in of payload_length: %u\n",
-				  payload_length);
+		if (!text_in)
 			return -ENOMEM;
-		}
 	}
 	cmd->text_in_ptr = text_in;
 
@@ -2313,10 +2306,9 @@
 	int ret;
 
 	isert_np = kzalloc(sizeof(struct isert_np), GFP_KERNEL);
-	if (!isert_np) {
-		isert_err("Unable to allocate struct isert_np\n");
+	if (!isert_np)
 		return -ENOMEM;
-	}
+
 	sema_init(&isert_np->sem, 0);
 	mutex_init(&isert_np->mutex);
 	INIT_LIST_HEAD(&isert_np->accepted);
@@ -2657,7 +2649,6 @@
 					WQ_UNBOUND | WQ_HIGHPRI, 0);
 	if (!isert_comp_wq) {
 		isert_err("Unable to allocate isert_comp_wq\n");
-		ret = -ENOMEM;
 		return -ENOMEM;
 	}
 
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index d980fb4..8ddc071 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -64,6 +64,11 @@
 MODULE_VERSION(DRV_VERSION);
 MODULE_INFO(release_date, DRV_RELDATE);
 
+#if !defined(CONFIG_DYNAMIC_DEBUG)
+#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
+#define DYNAMIC_DEBUG_BRANCH(descriptor) false
+#endif
+
 static unsigned int srp_sg_tablesize;
 static unsigned int cmd_sg_entries;
 static unsigned int indirect_sg_entries;
@@ -384,6 +389,9 @@
 				 max_page_list_len);
 		if (IS_ERR(mr)) {
 			ret = PTR_ERR(mr);
+			if (ret == -ENOMEM)
+				pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
+					dev_name(&device->dev));
 			goto destroy_pool;
 		}
 		d->mr = mr;
@@ -1266,8 +1274,12 @@
 	struct ib_pool_fmr *fmr;
 	u64 io_addr = 0;
 
-	if (state->fmr.next >= state->fmr.end)
+	if (state->fmr.next >= state->fmr.end) {
+		shost_printk(KERN_ERR, ch->target->scsi_host,
+			     PFX "Out of MRs (mr_per_cmd = %d)\n",
+			     ch->target->mr_per_cmd);
 		return -ENOMEM;
+	}
 
 	WARN_ON_ONCE(!dev->use_fmr);
 
@@ -1323,8 +1335,12 @@
 	u32 rkey;
 	int n, err;
 
-	if (state->fr.next >= state->fr.end)
+	if (state->fr.next >= state->fr.end) {
+		shost_printk(KERN_ERR, ch->target->scsi_host,
+			     PFX "Out of MRs (mr_per_cmd = %d)\n",
+			     ch->target->mr_per_cmd);
 		return -ENOMEM;
+	}
 
 	WARN_ON_ONCE(!dev->use_fast_reg);
 
@@ -1556,7 +1572,6 @@
 	return 0;
 }
 
-#if defined(DYNAMIC_DATA_DEBUG)
 static void srp_check_mapping(struct srp_map_state *state,
 			      struct srp_rdma_ch *ch, struct srp_request *req,
 			      struct scatterlist *scat, int count)
@@ -1580,7 +1595,6 @@
 		       scsi_bufflen(req->scmnd), desc_len, mr_len,
 		       state->ndesc, state->nmdesc);
 }
-#endif
 
 /**
  * srp_map_data() - map SCSI data buffer onto an SRP request
@@ -1669,14 +1683,12 @@
 	if (ret < 0)
 		goto unmap;
 
-#if defined(DYNAMIC_DEBUG)
 	{
 		DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
 			"Memory mapping consistency check");
-		if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT))
+		if (DYNAMIC_DEBUG_BRANCH(ddm))
 			srp_check_mapping(&state, ch, req, scat, count);
 	}
-#endif
 
 	/* We've mapped the request, now pull as much of the indirect
 	 * descriptor table as we can into the command buffer. If this
@@ -3287,7 +3299,9 @@
 	 */
 	scsi_host_get(target->scsi_host);
 
-	mutex_lock(&host->add_target_mutex);
+	ret = mutex_lock_interruptible(&host->add_target_mutex);
+	if (ret < 0)
+		goto put;
 
 	ret = srp_parse_options(buf, target);
 	if (ret)
@@ -3443,6 +3457,7 @@
 out:
 	mutex_unlock(&host->add_target_mutex);
 
+put:
 	scsi_host_put(target->scsi_host);
 	if (ret < 0)
 		scsi_host_put(target->scsi_host);
@@ -3526,6 +3541,7 @@
 static void srp_add_one(struct ib_device *device)
 {
 	struct srp_device *srp_dev;
+	struct ib_device_attr *attr = &device->attrs;
 	struct srp_host *host;
 	int mr_page_shift, p;
 	u64 max_pages_per_mr;
@@ -3540,25 +3556,25 @@
 	 * minimum of 4096 bytes. We're unlikely to build large sglists
 	 * out of smaller entries.
 	 */
-	mr_page_shift		= max(12, ffs(device->attrs.page_size_cap) - 1);
+	mr_page_shift		= max(12, ffs(attr->page_size_cap) - 1);
 	srp_dev->mr_page_size	= 1 << mr_page_shift;
 	srp_dev->mr_page_mask	= ~((u64) srp_dev->mr_page_size - 1);
-	max_pages_per_mr	= device->attrs.max_mr_size;
+	max_pages_per_mr	= attr->max_mr_size;
 	do_div(max_pages_per_mr, srp_dev->mr_page_size);
 	pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
-		 device->attrs.max_mr_size, srp_dev->mr_page_size,
+		 attr->max_mr_size, srp_dev->mr_page_size,
 		 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
 	srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
 					  max_pages_per_mr);
 
 	srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
 			    device->map_phys_fmr && device->unmap_fmr);
-	srp_dev->has_fr = (device->attrs.device_cap_flags &
+	srp_dev->has_fr = (attr->device_cap_flags &
 			   IB_DEVICE_MEM_MGT_EXTENSIONS);
 	if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
 		dev_warn(&device->dev, "neither FMR nor FR is supported\n");
 	} else if (!never_register &&
-		   device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) {
+		   attr->max_mr_size >= 2 * srp_dev->mr_page_size) {
 		srp_dev->use_fast_reg = (srp_dev->has_fr &&
 					 (!srp_dev->has_fmr || prefer_fr));
 		srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
@@ -3571,13 +3587,13 @@
 	if (srp_dev->use_fast_reg) {
 		srp_dev->max_pages_per_mr =
 			min_t(u32, srp_dev->max_pages_per_mr,
-			      device->attrs.max_fast_reg_page_list_len);
+			      attr->max_fast_reg_page_list_len);
 	}
 	srp_dev->mr_max_size	= srp_dev->mr_page_size *
 				   srp_dev->max_pages_per_mr;
 	pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
-		 device->name, mr_page_shift, device->attrs.max_mr_size,
-		 device->attrs.max_fast_reg_page_list_len,
+		 device->name, mr_page_shift, attr->max_mr_size,
+		 attr->max_fast_reg_page_list_len,
 		 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
 
 	INIT_LIST_HEAD(&srp_dev->dev_list);
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 84d7857..c548bea 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -1605,13 +1605,14 @@
 			r->com.from_state = r->com.state;
 			r->com.to_state = state;
 			r->com.state = RES_EQ_BUSY;
-			if (eq)
-				*eq = r;
 		}
 	}
 
 	spin_unlock_irq(mlx4_tlock(dev));
 
+	if (!err && eq)
+		*eq = r;
+
 	return err;
 }
 
diff --git a/drivers/net/ethernet/qlogic/qede/qede_roce.c b/drivers/net/ethernet/qlogic/qede/qede_roce.c
index 9867f96..4927271 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_roce.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_roce.c
@@ -191,8 +191,8 @@
 	}
 	mutex_unlock(&qedr_dev_list_lock);
 
-	DP_INFO(edev, "qedr: discovered and registered %d RoCE funcs\n",
-		qedr_counter);
+	pr_notice("qedr: discovered and registered %d RoCE funcs\n",
+		  qedr_counter);
 
 	return 0;
 }
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 5a83881..accbe8e 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -43,6 +43,28 @@
 
 #define NVME_RDMA_MAX_INLINE_SEGMENTS	1
 
+static const char *const nvme_rdma_cm_status_strs[] = {
+	[NVME_RDMA_CM_INVALID_LEN]	= "invalid length",
+	[NVME_RDMA_CM_INVALID_RECFMT]	= "invalid record format",
+	[NVME_RDMA_CM_INVALID_QID]	= "invalid queue ID",
+	[NVME_RDMA_CM_INVALID_HSQSIZE]	= "invalid host SQ size",
+	[NVME_RDMA_CM_INVALID_HRQSIZE]	= "invalid host RQ size",
+	[NVME_RDMA_CM_NO_RSC]		= "resource not found",
+	[NVME_RDMA_CM_INVALID_IRD]	= "invalid IRD",
+	[NVME_RDMA_CM_INVALID_ORD]	= "Invalid ORD",
+};
+
+static const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
+{
+	size_t index = status;
+
+	if (index < ARRAY_SIZE(nvme_rdma_cm_status_strs) &&
+	    nvme_rdma_cm_status_strs[index])
+		return nvme_rdma_cm_status_strs[index];
+	else
+		return "unrecognized reason";
+};
+
 /*
  * We handle AEN commands ourselves and don't even let the
  * block layer know about them.
@@ -1207,16 +1229,24 @@
 static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue,
 		struct rdma_cm_event *ev)
 {
-	if (ev->param.conn.private_data_len) {
-		struct nvme_rdma_cm_rej *rej =
-			(struct nvme_rdma_cm_rej *)ev->param.conn.private_data;
+	struct rdma_cm_id *cm_id = queue->cm_id;
+	int status = ev->status;
+	const char *rej_msg;
+	const struct nvme_rdma_cm_rej *rej_data;
+	u8 rej_data_len;
+
+	rej_msg = rdma_reject_msg(cm_id, status);
+	rej_data = rdma_consumer_reject_data(cm_id, ev, &rej_data_len);
+
+	if (rej_data && rej_data_len >= sizeof(u16)) {
+		u16 sts = le16_to_cpu(rej_data->sts);
 
 		dev_err(queue->ctrl->ctrl.device,
-			"Connect rejected, status %d.", le16_to_cpu(rej->sts));
-		/* XXX: Think of something clever to do here... */
+		      "Connect rejected: status %d (%s) nvme status %d (%s).\n",
+		      status, rej_msg, sts, nvme_rdma_cm_msg(sts));
 	} else {
 		dev_err(queue->ctrl->ctrl.device,
-			"Connect rejected, no private data.\n");
+			"Connect rejected: status %d (%s).\n", status, rej_msg);
 	}
 
 	return -ECONNRESET;
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index f8d2399..50f237a 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1358,6 +1358,9 @@
 		ret = nvmet_rdma_device_removal(cm_id, queue);
 		break;
 	case RDMA_CM_EVENT_REJECTED:
+		pr_debug("Connection rejected: %s\n",
+			 rdma_reject_msg(cm_id, event->status));
+		/* FALLTHROUGH */
 	case RDMA_CM_EVENT_UNREACHABLE:
 	case RDMA_CM_EVENT_CONNECT_ERROR:
 		nvmet_rdma_queue_connect_fail(cm_id, queue);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 2632cb2..0779ad2 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -576,7 +576,7 @@
 	u8         self_lb_en_modifiable[0x1];
 	u8         reserved_at_9[0x2];
 	u8         max_lso_cap[0x5];
-	u8         reserved_at_10[0x2];
+	u8         multi_pkt_send_wqe[0x2];
 	u8	   wqe_inline_mode[0x2];
 	u8         rss_ind_tbl_cap[0x4];
 	u8         reg_umr_sq[0x1];
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 92a7d85..b49258b 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -603,4 +603,10 @@
 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
 			struct ib_cm_sidr_rep_param *param);
 
+/**
+ * ibcm_reject_msg - return a pointer to a reject message string.
+ * @reason: Value returned in the REJECT event status field.
+ */
+const char *__attribute_const__ ibcm_reject_msg(int reason);
+
 #endif /* IB_CM_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 5ad43a4..8029d2a 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1102,6 +1102,7 @@
 	IB_QP_RESERVED2			= (1<<22),
 	IB_QP_RESERVED3			= (1<<23),
 	IB_QP_RESERVED4			= (1<<24),
+	IB_QP_RATE_LIMIT		= (1<<25),
 };
 
 enum ib_qp_state {
@@ -1151,6 +1152,7 @@
 	u8			rnr_retry;
 	u8			alt_port_num;
 	u8			alt_timeout;
+	u32			rate_limit;
 };
 
 enum ib_wr_opcode {
@@ -1592,17 +1594,19 @@
 /* Supported steering header types */
 enum ib_flow_spec_type {
 	/* L2 headers*/
-	IB_FLOW_SPEC_ETH	= 0x20,
-	IB_FLOW_SPEC_IB		= 0x22,
+	IB_FLOW_SPEC_ETH		= 0x20,
+	IB_FLOW_SPEC_IB			= 0x22,
 	/* L3 header*/
-	IB_FLOW_SPEC_IPV4	= 0x30,
-	IB_FLOW_SPEC_IPV6	= 0x31,
+	IB_FLOW_SPEC_IPV4		= 0x30,
+	IB_FLOW_SPEC_IPV6		= 0x31,
 	/* L4 headers*/
-	IB_FLOW_SPEC_TCP	= 0x40,
-	IB_FLOW_SPEC_UDP	= 0x41
+	IB_FLOW_SPEC_TCP		= 0x40,
+	IB_FLOW_SPEC_UDP		= 0x41,
+	IB_FLOW_SPEC_VXLAN_TUNNEL	= 0x50,
+	IB_FLOW_SPEC_INNER		= 0x100,
 };
 #define IB_FLOW_SPEC_LAYER_MASK	0xF0
-#define IB_FLOW_SPEC_SUPPORT_LAYERS 4
+#define IB_FLOW_SPEC_SUPPORT_LAYERS 8
 
 /* Flow steering rule priority is set according to it's domain.
  * Lower domain value means higher priority.
@@ -1630,7 +1634,7 @@
 };
 
 struct ib_flow_spec_eth {
-	enum ib_flow_spec_type	  type;
+	u32			  type;
 	u16			  size;
 	struct ib_flow_eth_filter val;
 	struct ib_flow_eth_filter mask;
@@ -1644,7 +1648,7 @@
 };
 
 struct ib_flow_spec_ib {
-	enum ib_flow_spec_type	 type;
+	u32			 type;
 	u16			 size;
 	struct ib_flow_ib_filter val;
 	struct ib_flow_ib_filter mask;
@@ -1669,7 +1673,7 @@
 };
 
 struct ib_flow_spec_ipv4 {
-	enum ib_flow_spec_type	   type;
+	u32			   type;
 	u16			   size;
 	struct ib_flow_ipv4_filter val;
 	struct ib_flow_ipv4_filter mask;
@@ -1687,7 +1691,7 @@
 };
 
 struct ib_flow_spec_ipv6 {
-	enum ib_flow_spec_type	   type;
+	u32			   type;
 	u16			   size;
 	struct ib_flow_ipv6_filter val;
 	struct ib_flow_ipv6_filter mask;
@@ -1701,15 +1705,30 @@
 };
 
 struct ib_flow_spec_tcp_udp {
-	enum ib_flow_spec_type	      type;
+	u32			      type;
 	u16			      size;
 	struct ib_flow_tcp_udp_filter val;
 	struct ib_flow_tcp_udp_filter mask;
 };
 
+struct ib_flow_tunnel_filter {
+	__be32	tunnel_id;
+	u8	real_sz[0];
+};
+
+/* ib_flow_spec_tunnel describes the Vxlan tunnel
+ * the tunnel_id from val has the vni value
+ */
+struct ib_flow_spec_tunnel {
+	u32			      type;
+	u16			      size;
+	struct ib_flow_tunnel_filter  val;
+	struct ib_flow_tunnel_filter  mask;
+};
+
 union ib_flow_spec {
 	struct {
-		enum ib_flow_spec_type	type;
+		u32			type;
 		u16			size;
 	};
 	struct ib_flow_spec_eth		eth;
@@ -1717,6 +1736,7 @@
 	struct ib_flow_spec_ipv4        ipv4;
 	struct ib_flow_spec_tcp_udp	tcp_udp;
 	struct ib_flow_spec_ipv6        ipv6;
+	struct ib_flow_spec_tunnel      tunnel;
 };
 
 struct ib_flow_attr {
@@ -1933,7 +1953,8 @@
 					       struct ib_udata *udata);
 	int                        (*dealloc_pd)(struct ib_pd *pd);
 	struct ib_ah *             (*create_ah)(struct ib_pd *pd,
-						struct ib_ah_attr *ah_attr);
+						struct ib_ah_attr *ah_attr,
+						struct ib_udata *udata);
 	int                        (*modify_ah)(struct ib_ah *ah,
 						struct ib_ah_attr *ah_attr);
 	int                        (*query_ah)(struct ib_ah *ah,
@@ -2581,6 +2602,24 @@
 struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
 
 /**
+ * ib_get_gids_from_rdma_hdr - Get sgid and dgid from GRH or IPv4 header
+ *   work completion.
+ * @hdr: the L3 header to parse
+ * @net_type: type of header to parse
+ * @sgid: place to store source gid
+ * @dgid: place to store destination gid
+ */
+int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
+			      enum rdma_network_type net_type,
+			      union ib_gid *sgid, union ib_gid *dgid);
+
+/**
+ * ib_get_rdma_header_version - Get the header version
+ * @hdr: the L3 header to parse
+ */
+int ib_get_rdma_header_version(const union rdma_network_hdr *hdr);
+
+/**
  * ib_init_ah_from_wc - Initializes address handle attributes from a
  *   work completion.
  * @device: Device on which the received message arrived.
@@ -3357,4 +3396,7 @@
 void ib_drain_rq(struct ib_qp *qp);
 void ib_drain_sq(struct ib_qp *qp);
 void ib_drain_qp(struct ib_qp *qp);
+
+int ib_resolve_eth_dmac(struct ib_device *device,
+			struct ib_ah_attr *ah_attr);
 #endif /* IB_VERBS_H */
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
index 6d0065c..5cd7701 100644
--- a/include/rdma/iw_cm.h
+++ b/include/rdma/iw_cm.h
@@ -253,4 +253,10 @@
 int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, struct ib_qp_attr *qp_attr,
 		       int *qp_attr_mask);
 
+/**
+ * iwcm_reject_msg - return a pointer to a reject message string.
+ * @reason: Value returned in the REJECT event status field.
+ */
+const char *__attribute_const__ iwcm_reject_msg(int reason);
+
 #endif /* IW_CM_H */
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 81fb1d1..d3968b5 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -388,4 +388,29 @@
  */
 __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr);
 
+/**
+ * rdma_reject_msg - return a pointer to a reject message string.
+ * @id: Communication identifier that received the REJECT event.
+ * @reason: Value returned in the REJECT event status field.
+ */
+const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
+						int reason);
+/**
+ * rdma_is_consumer_reject - return true if the consumer rejected the connect
+ *                           request.
+ * @id: Communication identifier that received the REJECT event.
+ * @reason: Value returned in the REJECT event status field.
+ */
+bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason);
+
+/**
+ * rdma_consumer_reject_data - return the consumer reject private data and
+ *			       length, if any.
+ * @id: Communication identifier that received the REJECT event.
+ * @ev: RDMA CM reject event.
+ * @data_len: Pointer to the resulting length of the consumer data.
+ */
+const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
+				      struct rdma_cm_event *ev, u8 *data_len);
+
 #endif /* RDMA_CM_H */
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index e315021..861e23e 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -185,6 +185,27 @@
 	 * check_support() for details.
 	 */
 
+	/* hot path calldowns in a single cacheline */
+
+	/*
+	 * Give the driver a notice that there is send work to do. It is up to
+	 * the driver to generally push the packets out, this just queues the
+	 * work with the driver. There are two variants here. The no_lock
+	 * version requires the s_lock not to be held. The other assumes the
+	 * s_lock is held.
+	 */
+	void (*schedule_send)(struct rvt_qp *qp);
+	void (*schedule_send_no_lock)(struct rvt_qp *qp);
+
+	/* Driver specific work request checking */
+	int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
+
+	/*
+	 * Sometimes rdmavt needs to kick the driver's send progress. That is
+	 * done by this call back.
+	 */
+	void (*do_send)(struct rvt_qp *qp);
+
 	/* Passed to ib core registration. Callback to create syfs files */
 	int (*port_callback)(struct ib_device *, u8, struct kobject *);
 
@@ -223,22 +244,6 @@
 	void (*notify_qp_reset)(struct rvt_qp *qp);
 
 	/*
-	 * Give the driver a notice that there is send work to do. It is up to
-	 * the driver to generally push the packets out, this just queues the
-	 * work with the driver. There are two variants here. The no_lock
-	 * version requires the s_lock not to be held. The other assumes the
-	 * s_lock is held.
-	 */
-	void (*schedule_send)(struct rvt_qp *qp);
-	void (*schedule_send_no_lock)(struct rvt_qp *qp);
-
-	/*
-	 * Sometimes rdmavt needs to kick the driver's send progress. That is
-	 * done by this call back.
-	 */
-	void (*do_send)(struct rvt_qp *qp);
-
-	/*
 	 * Get a path mtu from the driver based on qp attributes.
 	 */
 	int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
@@ -324,9 +329,6 @@
 	void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
 			  int attr_mask, struct ib_udata *udata);
 
-	/* Driver specific work request checking */
-	int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
-
 	/* Notify driver a mad agent has been created */
 	void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx);
 
@@ -355,12 +357,12 @@
 	/* post send table */
 	const struct rvt_operation_params *post_parms;
 
-	struct rvt_mregion __rcu *dma_mr;
-	struct rvt_lkey_table lkey_table;
-
 	/* Driver specific helper functions */
 	struct rvt_driver_provided driver_f;
 
+	struct rvt_mregion __rcu *dma_mr;
+	struct rvt_lkey_table lkey_table;
+
 	/* Internal use */
 	int n_pds_allocated;
 	spinlock_t n_pds_lock; /* Protect pd allocated count */
diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h
index 6b3c6c8..de59de2 100644
--- a/include/rdma/rdmavt_mr.h
+++ b/include/rdma/rdmavt_mr.h
@@ -90,11 +90,15 @@
 #define RVT_MAX_LKEY_TABLE_BITS 23
 
 struct rvt_lkey_table {
-	spinlock_t lock; /* protect changes in this struct */
+	/* read mostly fields */
+	u32 max;                /* size of the table */
+	u32 shift;              /* lkey/rkey shift */
+	struct rvt_mregion __rcu **table;
+	/* writeable fields */
+	/* protect changes in this struct */
+	spinlock_t lock ____cacheline_aligned_in_smp;
 	u32 next;               /* next unused index (speeds search) */
 	u32 gen;                /* generation count */
-	u32 max;                /* size of the table */
-	struct rvt_mregion __rcu **table;
 };
 
 /*
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 2c5183e..f3dbd15 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -51,6 +51,7 @@
 #include <rdma/rdma_vt.h>
 #include <rdma/ib_pack.h>
 #include <rdma/ib_verbs.h>
+#include <rdma/rdmavt_cq.h>
 /*
  * Atomic bit definitions for r_aflags.
  */
@@ -485,6 +486,23 @@
 }
 
 /**
+ * rvt_put_swqe - drop mr refs held by swqe
+ * @wqe - the send wqe
+ *
+ * This drops any mr references held by the swqe
+ */
+static inline void rvt_put_swqe(struct rvt_swqe *wqe)
+{
+	int i;
+
+	for (i = 0; i < wqe->wr.num_sge; i++) {
+		struct rvt_sge *sge = &wqe->sg_list[i];
+
+		rvt_put_mr(sge->mr);
+	}
+}
+
+/**
  * rvt_qp_wqe_reserve - reserve operation
  * @qp - the rvt qp
  * @wqe - the send wqe
@@ -527,6 +545,65 @@
 	}
 }
 
+extern const enum ib_wc_opcode ib_rvt_wc_opcode[];
+
+/**
+ * rvt_qp_swqe_complete() - insert send completion
+ * @qp - the qp
+ * @wqe - the send wqe
+ * @status - completion status
+ *
+ * Insert a send completion into the completion
+ * queue if the qp indicates it should be done.
+ *
+ * See IBTA 10.7.3.1 for info on completion
+ * control.
+ */
+static inline void rvt_qp_swqe_complete(
+	struct rvt_qp *qp,
+	struct rvt_swqe *wqe,
+	enum ib_wc_status status)
+{
+	if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED))
+		return;
+	if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
+	    (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
+	     status != IB_WC_SUCCESS) {
+		struct ib_wc wc;
+
+		memset(&wc, 0, sizeof(wc));
+		wc.wr_id = wqe->wr.wr_id;
+		wc.status = status;
+		wc.opcode = ib_rvt_wc_opcode[wqe->wr.opcode];
+		wc.qp = &qp->ibqp;
+		wc.byte_len = wqe->length;
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
+			     status != IB_WC_SUCCESS);
+	}
+}
+
+/**
+ * @qp - the qp pair
+ * @len - the length
+ *
+ * Perform a shift based mtu round up divide
+ */
+static inline u32 rvt_div_round_up_mtu(struct rvt_qp *qp, u32 len)
+{
+	return (len + qp->pmtu - 1) >> qp->log_pmtu;
+}
+
+/**
+ * @qp - the qp pair
+ * @len - the length
+ *
+ * Perform a shift based mtu divide
+ */
+static inline u32 rvt_div_mtu(struct rvt_qp *qp, u32 len)
+{
+	return len >> qp->log_pmtu;
+}
+
 extern const int  ib_rvt_state_ops[];
 
 struct rvt_dev_info;
diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
index f14ab7f..b54f10d 100644
--- a/include/uapi/rdma/Kbuild
+++ b/include/uapi/rdma/Kbuild
@@ -14,3 +14,4 @@
 header-y += mthca-abi.h
 header-y += nes-abi.h
 header-y += ocrdma-abi.h
+header-y += hns-abi.h
diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h
index d15e728..587b736 100644
--- a/include/uapi/rdma/hfi/hfi1_user.h
+++ b/include/uapi/rdma/hfi/hfi1_user.h
@@ -75,7 +75,7 @@
  * may not be implemented; the user code must deal with this if it
  * cares, or it must abort after initialization reports the difference.
  */
-#define HFI1_USER_SWMINOR 2
+#define HFI1_USER_SWMINOR 3
 
 /*
  * We will encode the major/minor inside a single 32bit version number.
diff --git a/drivers/infiniband/hw/hns/hns_roce_user.h b/include/uapi/rdma/hns-abi.h
similarity index 93%
rename from drivers/infiniband/hw/hns/hns_roce_user.h
rename to include/uapi/rdma/hns-abi.h
index a28f761..5d74019 100644
--- a/drivers/infiniband/hw/hns/hns_roce_user.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -30,8 +30,10 @@
  * SOFTWARE.
  */
 
-#ifndef _HNS_ROCE_USER_H
-#define _HNS_ROCE_USER_H
+#ifndef HNS_ABI_USER_H
+#define HNS_ABI_USER_H
+
+#include <linux/types.h>
 
 struct hns_roce_ib_create_cq {
 	__u64   buf_addr;
@@ -49,5 +51,4 @@
 struct hns_roce_ib_alloc_ucontext_resp {
 	__u32	qp_tab_size;
 };
-
-#endif /*_HNS_ROCE_USER_H */
+#endif /* HNS_ABI_USER_H */
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 25225eb..dfdfe4e 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -37,6 +37,7 @@
 #define IB_USER_VERBS_H
 
 #include <linux/types.h>
+#include <rdma/ib_verbs.h>
 
 /*
  * Increment this value if any changes that break userspace ABI
@@ -93,6 +94,7 @@
 	IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE,
 	IB_USER_VERBS_EX_CMD_CREATE_CQ = IB_USER_VERBS_CMD_CREATE_CQ,
 	IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP,
+	IB_USER_VERBS_EX_CMD_MODIFY_QP = IB_USER_VERBS_CMD_MODIFY_QP,
 	IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
 	IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
 	IB_USER_VERBS_EX_CMD_CREATE_WQ,
@@ -545,6 +547,14 @@
 	IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE,
 };
 
+enum {
+	IB_USER_LEGACY_LAST_QP_ATTR_MASK = IB_QP_DEST_QPN
+};
+
+enum {
+	IB_USER_LAST_QP_ATTR_MASK = IB_QP_RATE_LIMIT
+};
+
 struct ib_uverbs_ex_create_qp {
 	__u64 user_handle;
 	__u32 pd_handle;
@@ -684,9 +694,20 @@
 	__u64 driver_data[0];
 };
 
+struct ib_uverbs_ex_modify_qp {
+	struct ib_uverbs_modify_qp base;
+	__u32	rate_limit;
+	__u32	reserved;
+};
+
 struct ib_uverbs_modify_qp_resp {
 };
 
+struct ib_uverbs_ex_modify_qp_resp {
+	__u32  comp_mask;
+	__u32  response_length;
+};
+
 struct ib_uverbs_destroy_qp {
 	__u64 response;
 	__u32 qp_handle;
@@ -908,6 +929,23 @@
 	struct ib_uverbs_flow_ipv6_filter mask;
 };
 
+struct ib_uverbs_flow_tunnel_filter {
+	__be32 tunnel_id;
+};
+
+struct ib_uverbs_flow_spec_tunnel {
+	union {
+		struct ib_uverbs_flow_spec_hdr hdr;
+		struct {
+			__u32 type;
+			__u16 size;
+			__u16 reserved;
+		};
+	};
+	struct ib_uverbs_flow_tunnel_filter val;
+	struct ib_uverbs_flow_tunnel_filter mask;
+};
+
 struct ib_uverbs_flow_attr {
 	__u32 type;
 	__u16 size;
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index f5d0f4e..fae6cda 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -82,6 +82,7 @@
 
 enum mlx5_user_cmds_supp_uhw {
 	MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0,
+	MLX5_USER_CMDS_SUPP_UHW_CREATE_AH    = 1 << 1,
 };
 
 struct mlx5_ib_alloc_ucontext_resp {
@@ -124,18 +125,47 @@
 	__u8 reserved[7];
 };
 
+enum mlx5_ib_cqe_comp_res_format {
+	MLX5_IB_CQE_RES_FORMAT_HASH	= 1 << 0,
+	MLX5_IB_CQE_RES_FORMAT_CSUM	= 1 << 1,
+	MLX5_IB_CQE_RES_RESERVED	= 1 << 2,
+};
+
+struct mlx5_ib_cqe_comp_caps {
+	__u32 max_num;
+	__u32 supported_format; /* enum mlx5_ib_cqe_comp_res_format */
+};
+
+struct mlx5_packet_pacing_caps {
+	__u32 qp_rate_limit_min;
+	__u32 qp_rate_limit_max; /* In kpbs */
+
+	/* Corresponding bit will be set if qp type from
+	 * 'enum ib_qp_type' is supported, e.g.
+	 * supported_qpts |= 1 << IB_QPT_RAW_PACKET
+	 */
+	__u32 supported_qpts;
+	__u32 reserved;
+};
+
 struct mlx5_ib_query_device_resp {
 	__u32	comp_mask;
 	__u32	response_length;
 	struct	mlx5_ib_tso_caps tso_caps;
 	struct	mlx5_ib_rss_caps rss_caps;
+	struct	mlx5_ib_cqe_comp_caps cqe_comp_caps;
+	struct	mlx5_packet_pacing_caps packet_pacing_caps;
+	__u32	mlx5_ib_support_multi_pkt_send_wqes;
+	__u32	reserved;
 };
 
 struct mlx5_ib_create_cq {
 	__u64	buf_addr;
 	__u64	db_addr;
 	__u32	cqe_size;
-	__u32	reserved; /* explicit padding (optional on i386) */
+	__u8    cqe_comp_en;
+	__u8    cqe_comp_res_format;
+	__u16	reserved; /* explicit padding (optional on i386) */
 };
 
 struct mlx5_ib_create_cq_resp {
@@ -232,6 +262,12 @@
 	__u32   reserved;
 };
 
+struct mlx5_ib_create_ah_resp {
+	__u32	response_length;
+	__u8	dmac[ETH_ALEN];
+	__u8	reserved[6];
+};
+
 struct mlx5_ib_create_wq_resp {
 	__u32	response_length;
 	__u32	reserved;
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 345f090..d5f3117 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -100,11 +100,14 @@
 		trans->cm_connect_complete(conn, event);
 		break;
 
+	case RDMA_CM_EVENT_REJECTED:
+		rdsdebug("Connection rejected: %s\n",
+			 rdma_reject_msg(cm_id, event->status));
+		/* FALLTHROUGH */
 	case RDMA_CM_EVENT_ADDR_ERROR:
 	case RDMA_CM_EVENT_ROUTE_ERROR:
 	case RDMA_CM_EVENT_CONNECT_ERROR:
 	case RDMA_CM_EVENT_UNREACHABLE:
-	case RDMA_CM_EVENT_REJECTED:
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
 	case RDMA_CM_EVENT_ADDR_CHANGE:
 		if (conn)